1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io 5 */ 6 #include <uapi/linux/btf.h> 7 #include <linux/bpf-cgroup.h> 8 #include <linux/kernel.h> 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/bpf.h> 12 #include <linux/btf.h> 13 #include <linux/bpf_verifier.h> 14 #include <linux/filter.h> 15 #include <net/netlink.h> 16 #include <linux/file.h> 17 #include <linux/vmalloc.h> 18 #include <linux/stringify.h> 19 #include <linux/bsearch.h> 20 #include <linux/sort.h> 21 #include <linux/perf_event.h> 22 #include <linux/ctype.h> 23 #include <linux/error-injection.h> 24 #include <linux/bpf_lsm.h> 25 #include <linux/btf_ids.h> 26 #include <linux/poison.h> 27 #include <linux/module.h> 28 #include <linux/cpumask.h> 29 #include <linux/bpf_mem_alloc.h> 30 #include <net/xdp.h> 31 #include <linux/trace_events.h> 32 #include <linux/kallsyms.h> 33 34 #include "disasm.h" 35 36 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { 37 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 38 [_id] = & _name ## _verifier_ops, 39 #define BPF_MAP_TYPE(_id, _ops) 40 #define BPF_LINK_TYPE(_id, _name) 41 #include <linux/bpf_types.h> 42 #undef BPF_PROG_TYPE 43 #undef BPF_MAP_TYPE 44 #undef BPF_LINK_TYPE 45 }; 46 47 enum bpf_features { 48 BPF_FEAT_RDONLY_CAST_TO_VOID = 0, 49 BPF_FEAT_STREAMS = 1, 50 __MAX_BPF_FEAT, 51 }; 52 53 struct bpf_mem_alloc bpf_global_percpu_ma; 54 static bool bpf_global_percpu_ma_set; 55 56 /* bpf_check() is a static code analyzer that walks eBPF program 57 * instruction by instruction and updates register/stack state. 58 * All paths of conditional branches are analyzed until 'bpf_exit' insn. 59 * 60 * The first pass is depth-first-search to check that the program is a DAG. 61 * It rejects the following programs: 62 * - larger than BPF_MAXINSNS insns 63 * - if loop is present (detected via back-edge) 64 * - unreachable insns exist (shouldn't be a forest. program = one function) 65 * - out of bounds or malformed jumps 66 * The second pass is all possible path descent from the 1st insn. 67 * Since it's analyzing all paths through the program, the length of the 68 * analysis is limited to 64k insn, which may be hit even if total number of 69 * insn is less then 4K, but there are too many branches that change stack/regs. 70 * Number of 'branches to be analyzed' is limited to 1k 71 * 72 * On entry to each instruction, each register has a type, and the instruction 73 * changes the types of the registers depending on instruction semantics. 74 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is 75 * copied to R1. 76 * 77 * All registers are 64-bit. 78 * R0 - return register 79 * R1-R5 argument passing registers 80 * R6-R9 callee saved registers 81 * R10 - frame pointer read-only 82 * 83 * At the start of BPF program the register R1 contains a pointer to bpf_context 84 * and has type PTR_TO_CTX. 85 * 86 * Verifier tracks arithmetic operations on pointers in case: 87 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 88 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20), 89 * 1st insn copies R10 (which has FRAME_PTR) type into R1 90 * and 2nd arithmetic instruction is pattern matched to recognize 91 * that it wants to construct a pointer to some element within stack. 92 * So after 2nd insn, the register R1 has type PTR_TO_STACK 93 * (and -20 constant is saved for further stack bounds checking). 94 * Meaning that this reg is a pointer to stack plus known immediate constant. 95 * 96 * Most of the time the registers have SCALAR_VALUE type, which 97 * means the register has some value, but it's not a valid pointer. 98 * (like pointer plus pointer becomes SCALAR_VALUE type) 99 * 100 * When verifier sees load or store instructions the type of base register 101 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are 102 * four pointer types recognized by check_mem_access() function. 103 * 104 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value' 105 * and the range of [ptr, ptr + map's value_size) is accessible. 106 * 107 * registers used to pass values to function calls are checked against 108 * function argument constraints. 109 * 110 * ARG_PTR_TO_MAP_KEY is one of such argument constraints. 111 * It means that the register type passed to this function must be 112 * PTR_TO_STACK and it will be used inside the function as 113 * 'pointer to map element key' 114 * 115 * For example the argument constraints for bpf_map_lookup_elem(): 116 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 117 * .arg1_type = ARG_CONST_MAP_PTR, 118 * .arg2_type = ARG_PTR_TO_MAP_KEY, 119 * 120 * ret_type says that this function returns 'pointer to map elem value or null' 121 * function expects 1st argument to be a const pointer to 'struct bpf_map' and 122 * 2nd argument should be a pointer to stack, which will be used inside 123 * the helper function as a pointer to map element key. 124 * 125 * On the kernel side the helper function looks like: 126 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 127 * { 128 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; 129 * void *key = (void *) (unsigned long) r2; 130 * void *value; 131 * 132 * here kernel can access 'key' and 'map' pointers safely, knowing that 133 * [key, key + map->key_size) bytes are valid and were initialized on 134 * the stack of eBPF program. 135 * } 136 * 137 * Corresponding eBPF program may look like: 138 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR 139 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK 140 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP 141 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 142 * here verifier looks at prototype of map_lookup_elem() and sees: 143 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok, 144 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes 145 * 146 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far, 147 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits 148 * and were initialized prior to this call. 149 * If it's ok, then verifier allows this BPF_CALL insn and looks at 150 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets 151 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function 152 * returns either pointer to map value or NULL. 153 * 154 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off' 155 * insn, the register holding that pointer in the true branch changes state to 156 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false 157 * branch. See check_cond_jmp_op(). 158 * 159 * After the call R0 is set to return type of the function and registers R1-R5 160 * are set to NOT_INIT to indicate that they are no longer readable. 161 * 162 * The following reference types represent a potential reference to a kernel 163 * resource which, after first being allocated, must be checked and freed by 164 * the BPF program: 165 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET 166 * 167 * When the verifier sees a helper call return a reference type, it allocates a 168 * pointer id for the reference and stores it in the current function state. 169 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into 170 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type 171 * passes through a NULL-check conditional. For the branch wherein the state is 172 * changed to CONST_IMM, the verifier releases the reference. 173 * 174 * For each helper function that allocates a reference, such as 175 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as 176 * bpf_sk_release(). When a reference type passes into the release function, 177 * the verifier also releases the reference. If any unchecked or unreleased 178 * reference remains at the end of the program, the verifier rejects it. 179 */ 180 181 /* verifier_state + insn_idx are pushed to stack when branch is encountered */ 182 struct bpf_verifier_stack_elem { 183 /* verifier state is 'st' 184 * before processing instruction 'insn_idx' 185 * and after processing instruction 'prev_insn_idx' 186 */ 187 struct bpf_verifier_state st; 188 int insn_idx; 189 int prev_insn_idx; 190 struct bpf_verifier_stack_elem *next; 191 /* length of verifier log at the time this state was pushed on stack */ 192 u32 log_pos; 193 }; 194 195 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192 196 #define BPF_COMPLEXITY_LIMIT_STATES 64 197 198 #define BPF_MAP_KEY_POISON (1ULL << 63) 199 #define BPF_MAP_KEY_SEEN (1ULL << 62) 200 201 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512 202 203 #define BPF_PRIV_STACK_MIN_SIZE 64 204 205 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx); 206 static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id); 207 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id); 208 static void invalidate_non_owning_refs(struct bpf_verifier_env *env); 209 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env); 210 static int ref_set_non_owning(struct bpf_verifier_env *env, 211 struct bpf_reg_state *reg); 212 static bool is_trusted_reg(const struct bpf_reg_state *reg); 213 214 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) 215 { 216 return aux->map_ptr_state.poison; 217 } 218 219 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux) 220 { 221 return aux->map_ptr_state.unpriv; 222 } 223 224 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux, 225 struct bpf_map *map, 226 bool unpriv, bool poison) 227 { 228 unpriv |= bpf_map_ptr_unpriv(aux); 229 aux->map_ptr_state.unpriv = unpriv; 230 aux->map_ptr_state.poison = poison; 231 aux->map_ptr_state.map_ptr = map; 232 } 233 234 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux) 235 { 236 return aux->map_key_state & BPF_MAP_KEY_POISON; 237 } 238 239 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux) 240 { 241 return !(aux->map_key_state & BPF_MAP_KEY_SEEN); 242 } 243 244 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux) 245 { 246 return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON); 247 } 248 249 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state) 250 { 251 bool poisoned = bpf_map_key_poisoned(aux); 252 253 aux->map_key_state = state | BPF_MAP_KEY_SEEN | 254 (poisoned ? BPF_MAP_KEY_POISON : 0ULL); 255 } 256 257 static bool bpf_helper_call(const struct bpf_insn *insn) 258 { 259 return insn->code == (BPF_JMP | BPF_CALL) && 260 insn->src_reg == 0; 261 } 262 263 static bool bpf_pseudo_call(const struct bpf_insn *insn) 264 { 265 return insn->code == (BPF_JMP | BPF_CALL) && 266 insn->src_reg == BPF_PSEUDO_CALL; 267 } 268 269 static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn) 270 { 271 return insn->code == (BPF_JMP | BPF_CALL) && 272 insn->src_reg == BPF_PSEUDO_KFUNC_CALL; 273 } 274 275 struct bpf_map_desc { 276 struct bpf_map *ptr; 277 int uid; 278 }; 279 280 struct bpf_call_arg_meta { 281 struct bpf_map_desc map; 282 bool raw_mode; 283 bool pkt_access; 284 u8 release_regno; 285 int regno; 286 int access_size; 287 int mem_size; 288 u64 msize_max_value; 289 int ref_obj_id; 290 int dynptr_id; 291 int func_id; 292 struct btf *btf; 293 u32 btf_id; 294 struct btf *ret_btf; 295 u32 ret_btf_id; 296 u32 subprogno; 297 struct btf_field *kptr_field; 298 s64 const_map_key; 299 }; 300 301 struct bpf_kfunc_meta { 302 struct btf *btf; 303 const struct btf_type *proto; 304 const char *name; 305 const u32 *flags; 306 s32 id; 307 }; 308 309 struct bpf_kfunc_call_arg_meta { 310 /* In parameters */ 311 struct btf *btf; 312 u32 func_id; 313 u32 kfunc_flags; 314 const struct btf_type *func_proto; 315 const char *func_name; 316 /* Out parameters */ 317 u32 ref_obj_id; 318 u8 release_regno; 319 bool r0_rdonly; 320 u32 ret_btf_id; 321 u64 r0_size; 322 u32 subprogno; 323 struct { 324 u64 value; 325 bool found; 326 } arg_constant; 327 328 /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling, 329 * generally to pass info about user-defined local kptr types to later 330 * verification logic 331 * bpf_obj_drop/bpf_percpu_obj_drop 332 * Record the local kptr type to be drop'd 333 * bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type) 334 * Record the local kptr type to be refcount_incr'd and use 335 * arg_owning_ref to determine whether refcount_acquire should be 336 * fallible 337 */ 338 struct btf *arg_btf; 339 u32 arg_btf_id; 340 bool arg_owning_ref; 341 bool arg_prog; 342 343 struct { 344 struct btf_field *field; 345 } arg_list_head; 346 struct { 347 struct btf_field *field; 348 } arg_rbtree_root; 349 struct { 350 enum bpf_dynptr_type type; 351 u32 id; 352 u32 ref_obj_id; 353 } initialized_dynptr; 354 struct { 355 u8 spi; 356 u8 frameno; 357 } iter; 358 struct bpf_map_desc map; 359 u64 mem_size; 360 }; 361 362 struct btf *btf_vmlinux; 363 364 static const char *btf_type_name(const struct btf *btf, u32 id) 365 { 366 return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); 367 } 368 369 static DEFINE_MUTEX(bpf_verifier_lock); 370 static DEFINE_MUTEX(bpf_percpu_ma_lock); 371 372 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...) 373 { 374 struct bpf_verifier_env *env = private_data; 375 va_list args; 376 377 if (!bpf_verifier_log_needed(&env->log)) 378 return; 379 380 va_start(args, fmt); 381 bpf_verifier_vlog(&env->log, fmt, args); 382 va_end(args); 383 } 384 385 static void verbose_invalid_scalar(struct bpf_verifier_env *env, 386 struct bpf_reg_state *reg, 387 struct bpf_retval_range range, const char *ctx, 388 const char *reg_name) 389 { 390 bool unknown = true; 391 392 verbose(env, "%s the register %s has", ctx, reg_name); 393 if (reg->smin_value > S64_MIN) { 394 verbose(env, " smin=%lld", reg->smin_value); 395 unknown = false; 396 } 397 if (reg->smax_value < S64_MAX) { 398 verbose(env, " smax=%lld", reg->smax_value); 399 unknown = false; 400 } 401 if (unknown) 402 verbose(env, " unknown scalar value"); 403 verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval); 404 } 405 406 static bool reg_not_null(const struct bpf_reg_state *reg) 407 { 408 enum bpf_reg_type type; 409 410 type = reg->type; 411 if (type_may_be_null(type)) 412 return false; 413 414 type = base_type(type); 415 return type == PTR_TO_SOCKET || 416 type == PTR_TO_TCP_SOCK || 417 type == PTR_TO_MAP_VALUE || 418 type == PTR_TO_MAP_KEY || 419 type == PTR_TO_SOCK_COMMON || 420 (type == PTR_TO_BTF_ID && is_trusted_reg(reg)) || 421 (type == PTR_TO_MEM && !(reg->type & PTR_UNTRUSTED)) || 422 type == CONST_PTR_TO_MAP; 423 } 424 425 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg) 426 { 427 struct btf_record *rec = NULL; 428 struct btf_struct_meta *meta; 429 430 if (reg->type == PTR_TO_MAP_VALUE) { 431 rec = reg->map_ptr->record; 432 } else if (type_is_ptr_alloc_obj(reg->type)) { 433 meta = btf_find_struct_meta(reg->btf, reg->btf_id); 434 if (meta) 435 rec = meta->record; 436 } 437 return rec; 438 } 439 440 static bool subprog_is_global(const struct bpf_verifier_env *env, int subprog) 441 { 442 struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux; 443 444 return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL; 445 } 446 447 static const char *subprog_name(const struct bpf_verifier_env *env, int subprog) 448 { 449 struct bpf_func_info *info; 450 451 if (!env->prog->aux->func_info) 452 return ""; 453 454 info = &env->prog->aux->func_info[subprog]; 455 return btf_type_name(env->prog->aux->btf, info->type_id); 456 } 457 458 static void mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog) 459 { 460 struct bpf_subprog_info *info = subprog_info(env, subprog); 461 462 info->is_cb = true; 463 info->is_async_cb = true; 464 info->is_exception_cb = true; 465 } 466 467 static bool subprog_is_exc_cb(struct bpf_verifier_env *env, int subprog) 468 { 469 return subprog_info(env, subprog)->is_exception_cb; 470 } 471 472 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) 473 { 474 return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK); 475 } 476 477 static bool type_is_rdonly_mem(u32 type) 478 { 479 return type & MEM_RDONLY; 480 } 481 482 static bool is_acquire_function(enum bpf_func_id func_id, 483 const struct bpf_map *map) 484 { 485 enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC; 486 487 if (func_id == BPF_FUNC_sk_lookup_tcp || 488 func_id == BPF_FUNC_sk_lookup_udp || 489 func_id == BPF_FUNC_skc_lookup_tcp || 490 func_id == BPF_FUNC_ringbuf_reserve || 491 func_id == BPF_FUNC_kptr_xchg) 492 return true; 493 494 if (func_id == BPF_FUNC_map_lookup_elem && 495 (map_type == BPF_MAP_TYPE_SOCKMAP || 496 map_type == BPF_MAP_TYPE_SOCKHASH)) 497 return true; 498 499 return false; 500 } 501 502 static bool is_ptr_cast_function(enum bpf_func_id func_id) 503 { 504 return func_id == BPF_FUNC_tcp_sock || 505 func_id == BPF_FUNC_sk_fullsock || 506 func_id == BPF_FUNC_skc_to_tcp_sock || 507 func_id == BPF_FUNC_skc_to_tcp6_sock || 508 func_id == BPF_FUNC_skc_to_udp6_sock || 509 func_id == BPF_FUNC_skc_to_mptcp_sock || 510 func_id == BPF_FUNC_skc_to_tcp_timewait_sock || 511 func_id == BPF_FUNC_skc_to_tcp_request_sock; 512 } 513 514 static bool is_dynptr_ref_function(enum bpf_func_id func_id) 515 { 516 return func_id == BPF_FUNC_dynptr_data; 517 } 518 519 static bool is_sync_callback_calling_kfunc(u32 btf_id); 520 static bool is_async_callback_calling_kfunc(u32 btf_id); 521 static bool is_callback_calling_kfunc(u32 btf_id); 522 static bool is_bpf_throw_kfunc(struct bpf_insn *insn); 523 524 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id); 525 static bool is_task_work_add_kfunc(u32 func_id); 526 527 static bool is_sync_callback_calling_function(enum bpf_func_id func_id) 528 { 529 return func_id == BPF_FUNC_for_each_map_elem || 530 func_id == BPF_FUNC_find_vma || 531 func_id == BPF_FUNC_loop || 532 func_id == BPF_FUNC_user_ringbuf_drain; 533 } 534 535 static bool is_async_callback_calling_function(enum bpf_func_id func_id) 536 { 537 return func_id == BPF_FUNC_timer_set_callback; 538 } 539 540 static bool is_callback_calling_function(enum bpf_func_id func_id) 541 { 542 return is_sync_callback_calling_function(func_id) || 543 is_async_callback_calling_function(func_id); 544 } 545 546 static bool is_sync_callback_calling_insn(struct bpf_insn *insn) 547 { 548 return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) || 549 (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm)); 550 } 551 552 static bool is_async_callback_calling_insn(struct bpf_insn *insn) 553 { 554 return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) || 555 (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm)); 556 } 557 558 static bool is_async_cb_sleepable(struct bpf_verifier_env *env, struct bpf_insn *insn) 559 { 560 /* bpf_timer callbacks are never sleepable. */ 561 if (bpf_helper_call(insn) && insn->imm == BPF_FUNC_timer_set_callback) 562 return false; 563 564 /* bpf_wq and bpf_task_work callbacks are always sleepable. */ 565 if (bpf_pseudo_kfunc_call(insn) && insn->off == 0 && 566 (is_bpf_wq_set_callback_kfunc(insn->imm) || is_task_work_add_kfunc(insn->imm))) 567 return true; 568 569 verifier_bug(env, "unhandled async callback in is_async_cb_sleepable"); 570 return false; 571 } 572 573 static bool is_may_goto_insn(struct bpf_insn *insn) 574 { 575 return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO; 576 } 577 578 static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx) 579 { 580 return is_may_goto_insn(&env->prog->insnsi[insn_idx]); 581 } 582 583 static bool is_storage_get_function(enum bpf_func_id func_id) 584 { 585 return func_id == BPF_FUNC_sk_storage_get || 586 func_id == BPF_FUNC_inode_storage_get || 587 func_id == BPF_FUNC_task_storage_get || 588 func_id == BPF_FUNC_cgrp_storage_get; 589 } 590 591 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id, 592 const struct bpf_map *map) 593 { 594 int ref_obj_uses = 0; 595 596 if (is_ptr_cast_function(func_id)) 597 ref_obj_uses++; 598 if (is_acquire_function(func_id, map)) 599 ref_obj_uses++; 600 if (is_dynptr_ref_function(func_id)) 601 ref_obj_uses++; 602 603 return ref_obj_uses > 1; 604 } 605 606 static bool is_cmpxchg_insn(const struct bpf_insn *insn) 607 { 608 return BPF_CLASS(insn->code) == BPF_STX && 609 BPF_MODE(insn->code) == BPF_ATOMIC && 610 insn->imm == BPF_CMPXCHG; 611 } 612 613 static bool is_atomic_load_insn(const struct bpf_insn *insn) 614 { 615 return BPF_CLASS(insn->code) == BPF_STX && 616 BPF_MODE(insn->code) == BPF_ATOMIC && 617 insn->imm == BPF_LOAD_ACQ; 618 } 619 620 static int __get_spi(s32 off) 621 { 622 return (-off - 1) / BPF_REG_SIZE; 623 } 624 625 static struct bpf_func_state *func(struct bpf_verifier_env *env, 626 const struct bpf_reg_state *reg) 627 { 628 struct bpf_verifier_state *cur = env->cur_state; 629 630 return cur->frame[reg->frameno]; 631 } 632 633 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots) 634 { 635 int allocated_slots = state->allocated_stack / BPF_REG_SIZE; 636 637 /* We need to check that slots between [spi - nr_slots + 1, spi] are 638 * within [0, allocated_stack). 639 * 640 * Please note that the spi grows downwards. For example, a dynptr 641 * takes the size of two stack slots; the first slot will be at 642 * spi and the second slot will be at spi - 1. 643 */ 644 return spi - nr_slots + 1 >= 0 && spi < allocated_slots; 645 } 646 647 static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 648 const char *obj_kind, int nr_slots) 649 { 650 int off, spi; 651 652 if (!tnum_is_const(reg->var_off)) { 653 verbose(env, "%s has to be at a constant offset\n", obj_kind); 654 return -EINVAL; 655 } 656 657 off = reg->off + reg->var_off.value; 658 if (off % BPF_REG_SIZE) { 659 verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off); 660 return -EINVAL; 661 } 662 663 spi = __get_spi(off); 664 if (spi + 1 < nr_slots) { 665 verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off); 666 return -EINVAL; 667 } 668 669 if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots)) 670 return -ERANGE; 671 return spi; 672 } 673 674 static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 675 { 676 return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS); 677 } 678 679 static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots) 680 { 681 return stack_slot_obj_get_spi(env, reg, "iter", nr_slots); 682 } 683 684 static int irq_flag_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 685 { 686 return stack_slot_obj_get_spi(env, reg, "irq_flag", 1); 687 } 688 689 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) 690 { 691 switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { 692 case DYNPTR_TYPE_LOCAL: 693 return BPF_DYNPTR_TYPE_LOCAL; 694 case DYNPTR_TYPE_RINGBUF: 695 return BPF_DYNPTR_TYPE_RINGBUF; 696 case DYNPTR_TYPE_SKB: 697 return BPF_DYNPTR_TYPE_SKB; 698 case DYNPTR_TYPE_XDP: 699 return BPF_DYNPTR_TYPE_XDP; 700 case DYNPTR_TYPE_SKB_META: 701 return BPF_DYNPTR_TYPE_SKB_META; 702 case DYNPTR_TYPE_FILE: 703 return BPF_DYNPTR_TYPE_FILE; 704 default: 705 return BPF_DYNPTR_TYPE_INVALID; 706 } 707 } 708 709 static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type) 710 { 711 switch (type) { 712 case BPF_DYNPTR_TYPE_LOCAL: 713 return DYNPTR_TYPE_LOCAL; 714 case BPF_DYNPTR_TYPE_RINGBUF: 715 return DYNPTR_TYPE_RINGBUF; 716 case BPF_DYNPTR_TYPE_SKB: 717 return DYNPTR_TYPE_SKB; 718 case BPF_DYNPTR_TYPE_XDP: 719 return DYNPTR_TYPE_XDP; 720 case BPF_DYNPTR_TYPE_SKB_META: 721 return DYNPTR_TYPE_SKB_META; 722 case BPF_DYNPTR_TYPE_FILE: 723 return DYNPTR_TYPE_FILE; 724 default: 725 return 0; 726 } 727 } 728 729 static bool dynptr_type_refcounted(enum bpf_dynptr_type type) 730 { 731 return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE; 732 } 733 734 static void __mark_dynptr_reg(struct bpf_reg_state *reg, 735 enum bpf_dynptr_type type, 736 bool first_slot, int dynptr_id); 737 738 static void __mark_reg_not_init(const struct bpf_verifier_env *env, 739 struct bpf_reg_state *reg); 740 741 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env, 742 struct bpf_reg_state *sreg1, 743 struct bpf_reg_state *sreg2, 744 enum bpf_dynptr_type type) 745 { 746 int id = ++env->id_gen; 747 748 __mark_dynptr_reg(sreg1, type, true, id); 749 __mark_dynptr_reg(sreg2, type, false, id); 750 } 751 752 static void mark_dynptr_cb_reg(struct bpf_verifier_env *env, 753 struct bpf_reg_state *reg, 754 enum bpf_dynptr_type type) 755 { 756 __mark_dynptr_reg(reg, type, true, ++env->id_gen); 757 } 758 759 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, 760 struct bpf_func_state *state, int spi); 761 762 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 763 enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id) 764 { 765 struct bpf_func_state *state = func(env, reg); 766 enum bpf_dynptr_type type; 767 int spi, i, err; 768 769 spi = dynptr_get_spi(env, reg); 770 if (spi < 0) 771 return spi; 772 773 /* We cannot assume both spi and spi - 1 belong to the same dynptr, 774 * hence we need to call destroy_if_dynptr_stack_slot twice for both, 775 * to ensure that for the following example: 776 * [d1][d1][d2][d2] 777 * spi 3 2 1 0 778 * So marking spi = 2 should lead to destruction of both d1 and d2. In 779 * case they do belong to same dynptr, second call won't see slot_type 780 * as STACK_DYNPTR and will simply skip destruction. 781 */ 782 err = destroy_if_dynptr_stack_slot(env, state, spi); 783 if (err) 784 return err; 785 err = destroy_if_dynptr_stack_slot(env, state, spi - 1); 786 if (err) 787 return err; 788 789 for (i = 0; i < BPF_REG_SIZE; i++) { 790 state->stack[spi].slot_type[i] = STACK_DYNPTR; 791 state->stack[spi - 1].slot_type[i] = STACK_DYNPTR; 792 } 793 794 type = arg_to_dynptr_type(arg_type); 795 if (type == BPF_DYNPTR_TYPE_INVALID) 796 return -EINVAL; 797 798 mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr, 799 &state->stack[spi - 1].spilled_ptr, type); 800 801 if (dynptr_type_refcounted(type)) { 802 /* The id is used to track proper releasing */ 803 int id; 804 805 if (clone_ref_obj_id) 806 id = clone_ref_obj_id; 807 else 808 id = acquire_reference(env, insn_idx); 809 810 if (id < 0) 811 return id; 812 813 state->stack[spi].spilled_ptr.ref_obj_id = id; 814 state->stack[spi - 1].spilled_ptr.ref_obj_id = id; 815 } 816 817 bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi)); 818 819 return 0; 820 } 821 822 static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi) 823 { 824 int i; 825 826 for (i = 0; i < BPF_REG_SIZE; i++) { 827 state->stack[spi].slot_type[i] = STACK_INVALID; 828 state->stack[spi - 1].slot_type[i] = STACK_INVALID; 829 } 830 831 __mark_reg_not_init(env, &state->stack[spi].spilled_ptr); 832 __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr); 833 834 bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi)); 835 } 836 837 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 838 { 839 struct bpf_func_state *state = func(env, reg); 840 int spi, ref_obj_id, i; 841 842 /* 843 * This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot 844 * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr 845 * is safe to do directly. 846 */ 847 if (reg->type == CONST_PTR_TO_DYNPTR) { 848 verifier_bug(env, "CONST_PTR_TO_DYNPTR cannot be released"); 849 return -EFAULT; 850 } 851 spi = dynptr_get_spi(env, reg); 852 if (spi < 0) 853 return spi; 854 855 if (!dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) { 856 invalidate_dynptr(env, state, spi); 857 return 0; 858 } 859 860 ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id; 861 862 /* If the dynptr has a ref_obj_id, then we need to invalidate 863 * two things: 864 * 865 * 1) Any dynptrs with a matching ref_obj_id (clones) 866 * 2) Any slices derived from this dynptr. 867 */ 868 869 /* Invalidate any slices associated with this dynptr */ 870 WARN_ON_ONCE(release_reference(env, ref_obj_id)); 871 872 /* Invalidate any dynptr clones */ 873 for (i = 1; i < state->allocated_stack / BPF_REG_SIZE; i++) { 874 if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id) 875 continue; 876 877 /* it should always be the case that if the ref obj id 878 * matches then the stack slot also belongs to a 879 * dynptr 880 */ 881 if (state->stack[i].slot_type[0] != STACK_DYNPTR) { 882 verifier_bug(env, "misconfigured ref_obj_id"); 883 return -EFAULT; 884 } 885 if (state->stack[i].spilled_ptr.dynptr.first_slot) 886 invalidate_dynptr(env, state, i); 887 } 888 889 return 0; 890 } 891 892 static void __mark_reg_unknown(const struct bpf_verifier_env *env, 893 struct bpf_reg_state *reg); 894 895 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg) 896 { 897 if (!env->allow_ptr_leaks) 898 __mark_reg_not_init(env, reg); 899 else 900 __mark_reg_unknown(env, reg); 901 } 902 903 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, 904 struct bpf_func_state *state, int spi) 905 { 906 struct bpf_func_state *fstate; 907 struct bpf_reg_state *dreg; 908 int i, dynptr_id; 909 910 /* We always ensure that STACK_DYNPTR is never set partially, 911 * hence just checking for slot_type[0] is enough. This is 912 * different for STACK_SPILL, where it may be only set for 913 * 1 byte, so code has to use is_spilled_reg. 914 */ 915 if (state->stack[spi].slot_type[0] != STACK_DYNPTR) 916 return 0; 917 918 /* Reposition spi to first slot */ 919 if (!state->stack[spi].spilled_ptr.dynptr.first_slot) 920 spi = spi + 1; 921 922 if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) { 923 verbose(env, "cannot overwrite referenced dynptr\n"); 924 return -EINVAL; 925 } 926 927 mark_stack_slot_scratched(env, spi); 928 mark_stack_slot_scratched(env, spi - 1); 929 930 /* Writing partially to one dynptr stack slot destroys both. */ 931 for (i = 0; i < BPF_REG_SIZE; i++) { 932 state->stack[spi].slot_type[i] = STACK_INVALID; 933 state->stack[spi - 1].slot_type[i] = STACK_INVALID; 934 } 935 936 dynptr_id = state->stack[spi].spilled_ptr.id; 937 /* Invalidate any slices associated with this dynptr */ 938 bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({ 939 /* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */ 940 if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM) 941 continue; 942 if (dreg->dynptr_id == dynptr_id) 943 mark_reg_invalid(env, dreg); 944 })); 945 946 /* Do not release reference state, we are destroying dynptr on stack, 947 * not using some helper to release it. Just reset register. 948 */ 949 __mark_reg_not_init(env, &state->stack[spi].spilled_ptr); 950 __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr); 951 952 bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi)); 953 954 return 0; 955 } 956 957 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 958 { 959 int spi; 960 961 if (reg->type == CONST_PTR_TO_DYNPTR) 962 return false; 963 964 spi = dynptr_get_spi(env, reg); 965 966 /* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an 967 * error because this just means the stack state hasn't been updated yet. 968 * We will do check_mem_access to check and update stack bounds later. 969 */ 970 if (spi < 0 && spi != -ERANGE) 971 return false; 972 973 /* We don't need to check if the stack slots are marked by previous 974 * dynptr initializations because we allow overwriting existing unreferenced 975 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls 976 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are 977 * touching are completely destructed before we reinitialize them for a new 978 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early 979 * instead of delaying it until the end where the user will get "Unreleased 980 * reference" error. 981 */ 982 return true; 983 } 984 985 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 986 { 987 struct bpf_func_state *state = func(env, reg); 988 int i, spi; 989 990 /* This already represents first slot of initialized bpf_dynptr. 991 * 992 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to 993 * check_func_arg_reg_off's logic, so we don't need to check its 994 * offset and alignment. 995 */ 996 if (reg->type == CONST_PTR_TO_DYNPTR) 997 return true; 998 999 spi = dynptr_get_spi(env, reg); 1000 if (spi < 0) 1001 return false; 1002 if (!state->stack[spi].spilled_ptr.dynptr.first_slot) 1003 return false; 1004 1005 for (i = 0; i < BPF_REG_SIZE; i++) { 1006 if (state->stack[spi].slot_type[i] != STACK_DYNPTR || 1007 state->stack[spi - 1].slot_type[i] != STACK_DYNPTR) 1008 return false; 1009 } 1010 1011 return true; 1012 } 1013 1014 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 1015 enum bpf_arg_type arg_type) 1016 { 1017 struct bpf_func_state *state = func(env, reg); 1018 enum bpf_dynptr_type dynptr_type; 1019 int spi; 1020 1021 /* ARG_PTR_TO_DYNPTR takes any type of dynptr */ 1022 if (arg_type == ARG_PTR_TO_DYNPTR) 1023 return true; 1024 1025 dynptr_type = arg_to_dynptr_type(arg_type); 1026 if (reg->type == CONST_PTR_TO_DYNPTR) { 1027 return reg->dynptr.type == dynptr_type; 1028 } else { 1029 spi = dynptr_get_spi(env, reg); 1030 if (spi < 0) 1031 return false; 1032 return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type; 1033 } 1034 } 1035 1036 static void __mark_reg_known_zero(struct bpf_reg_state *reg); 1037 1038 static bool in_rcu_cs(struct bpf_verifier_env *env); 1039 1040 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta); 1041 1042 static int mark_stack_slots_iter(struct bpf_verifier_env *env, 1043 struct bpf_kfunc_call_arg_meta *meta, 1044 struct bpf_reg_state *reg, int insn_idx, 1045 struct btf *btf, u32 btf_id, int nr_slots) 1046 { 1047 struct bpf_func_state *state = func(env, reg); 1048 int spi, i, j, id; 1049 1050 spi = iter_get_spi(env, reg, nr_slots); 1051 if (spi < 0) 1052 return spi; 1053 1054 id = acquire_reference(env, insn_idx); 1055 if (id < 0) 1056 return id; 1057 1058 for (i = 0; i < nr_slots; i++) { 1059 struct bpf_stack_state *slot = &state->stack[spi - i]; 1060 struct bpf_reg_state *st = &slot->spilled_ptr; 1061 1062 __mark_reg_known_zero(st); 1063 st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ 1064 if (is_kfunc_rcu_protected(meta)) { 1065 if (in_rcu_cs(env)) 1066 st->type |= MEM_RCU; 1067 else 1068 st->type |= PTR_UNTRUSTED; 1069 } 1070 st->ref_obj_id = i == 0 ? id : 0; 1071 st->iter.btf = btf; 1072 st->iter.btf_id = btf_id; 1073 st->iter.state = BPF_ITER_STATE_ACTIVE; 1074 st->iter.depth = 0; 1075 1076 for (j = 0; j < BPF_REG_SIZE; j++) 1077 slot->slot_type[j] = STACK_ITER; 1078 1079 bpf_mark_stack_write(env, state->frameno, BIT(spi - i)); 1080 mark_stack_slot_scratched(env, spi - i); 1081 } 1082 1083 return 0; 1084 } 1085 1086 static int unmark_stack_slots_iter(struct bpf_verifier_env *env, 1087 struct bpf_reg_state *reg, int nr_slots) 1088 { 1089 struct bpf_func_state *state = func(env, reg); 1090 int spi, i, j; 1091 1092 spi = iter_get_spi(env, reg, nr_slots); 1093 if (spi < 0) 1094 return spi; 1095 1096 for (i = 0; i < nr_slots; i++) { 1097 struct bpf_stack_state *slot = &state->stack[spi - i]; 1098 struct bpf_reg_state *st = &slot->spilled_ptr; 1099 1100 if (i == 0) 1101 WARN_ON_ONCE(release_reference(env, st->ref_obj_id)); 1102 1103 __mark_reg_not_init(env, st); 1104 1105 for (j = 0; j < BPF_REG_SIZE; j++) 1106 slot->slot_type[j] = STACK_INVALID; 1107 1108 bpf_mark_stack_write(env, state->frameno, BIT(spi - i)); 1109 mark_stack_slot_scratched(env, spi - i); 1110 } 1111 1112 return 0; 1113 } 1114 1115 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env, 1116 struct bpf_reg_state *reg, int nr_slots) 1117 { 1118 struct bpf_func_state *state = func(env, reg); 1119 int spi, i, j; 1120 1121 /* For -ERANGE (i.e. spi not falling into allocated stack slots), we 1122 * will do check_mem_access to check and update stack bounds later, so 1123 * return true for that case. 1124 */ 1125 spi = iter_get_spi(env, reg, nr_slots); 1126 if (spi == -ERANGE) 1127 return true; 1128 if (spi < 0) 1129 return false; 1130 1131 for (i = 0; i < nr_slots; i++) { 1132 struct bpf_stack_state *slot = &state->stack[spi - i]; 1133 1134 for (j = 0; j < BPF_REG_SIZE; j++) 1135 if (slot->slot_type[j] == STACK_ITER) 1136 return false; 1137 } 1138 1139 return true; 1140 } 1141 1142 static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 1143 struct btf *btf, u32 btf_id, int nr_slots) 1144 { 1145 struct bpf_func_state *state = func(env, reg); 1146 int spi, i, j; 1147 1148 spi = iter_get_spi(env, reg, nr_slots); 1149 if (spi < 0) 1150 return -EINVAL; 1151 1152 for (i = 0; i < nr_slots; i++) { 1153 struct bpf_stack_state *slot = &state->stack[spi - i]; 1154 struct bpf_reg_state *st = &slot->spilled_ptr; 1155 1156 if (st->type & PTR_UNTRUSTED) 1157 return -EPROTO; 1158 /* only main (first) slot has ref_obj_id set */ 1159 if (i == 0 && !st->ref_obj_id) 1160 return -EINVAL; 1161 if (i != 0 && st->ref_obj_id) 1162 return -EINVAL; 1163 if (st->iter.btf != btf || st->iter.btf_id != btf_id) 1164 return -EINVAL; 1165 1166 for (j = 0; j < BPF_REG_SIZE; j++) 1167 if (slot->slot_type[j] != STACK_ITER) 1168 return -EINVAL; 1169 } 1170 1171 return 0; 1172 } 1173 1174 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx); 1175 static int release_irq_state(struct bpf_verifier_state *state, int id); 1176 1177 static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env, 1178 struct bpf_kfunc_call_arg_meta *meta, 1179 struct bpf_reg_state *reg, int insn_idx, 1180 int kfunc_class) 1181 { 1182 struct bpf_func_state *state = func(env, reg); 1183 struct bpf_stack_state *slot; 1184 struct bpf_reg_state *st; 1185 int spi, i, id; 1186 1187 spi = irq_flag_get_spi(env, reg); 1188 if (spi < 0) 1189 return spi; 1190 1191 id = acquire_irq_state(env, insn_idx); 1192 if (id < 0) 1193 return id; 1194 1195 slot = &state->stack[spi]; 1196 st = &slot->spilled_ptr; 1197 1198 bpf_mark_stack_write(env, reg->frameno, BIT(spi)); 1199 __mark_reg_known_zero(st); 1200 st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ 1201 st->ref_obj_id = id; 1202 st->irq.kfunc_class = kfunc_class; 1203 1204 for (i = 0; i < BPF_REG_SIZE; i++) 1205 slot->slot_type[i] = STACK_IRQ_FLAG; 1206 1207 mark_stack_slot_scratched(env, spi); 1208 return 0; 1209 } 1210 1211 static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 1212 int kfunc_class) 1213 { 1214 struct bpf_func_state *state = func(env, reg); 1215 struct bpf_stack_state *slot; 1216 struct bpf_reg_state *st; 1217 int spi, i, err; 1218 1219 spi = irq_flag_get_spi(env, reg); 1220 if (spi < 0) 1221 return spi; 1222 1223 slot = &state->stack[spi]; 1224 st = &slot->spilled_ptr; 1225 1226 if (st->irq.kfunc_class != kfunc_class) { 1227 const char *flag_kfunc = st->irq.kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock"; 1228 const char *used_kfunc = kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock"; 1229 1230 verbose(env, "irq flag acquired by %s kfuncs cannot be restored with %s kfuncs\n", 1231 flag_kfunc, used_kfunc); 1232 return -EINVAL; 1233 } 1234 1235 err = release_irq_state(env->cur_state, st->ref_obj_id); 1236 WARN_ON_ONCE(err && err != -EACCES); 1237 if (err) { 1238 int insn_idx = 0; 1239 1240 for (int i = 0; i < env->cur_state->acquired_refs; i++) { 1241 if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) { 1242 insn_idx = env->cur_state->refs[i].insn_idx; 1243 break; 1244 } 1245 } 1246 1247 verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n", 1248 env->cur_state->active_irq_id, insn_idx); 1249 return err; 1250 } 1251 1252 __mark_reg_not_init(env, st); 1253 1254 bpf_mark_stack_write(env, reg->frameno, BIT(spi)); 1255 1256 for (i = 0; i < BPF_REG_SIZE; i++) 1257 slot->slot_type[i] = STACK_INVALID; 1258 1259 mark_stack_slot_scratched(env, spi); 1260 return 0; 1261 } 1262 1263 static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 1264 { 1265 struct bpf_func_state *state = func(env, reg); 1266 struct bpf_stack_state *slot; 1267 int spi, i; 1268 1269 /* For -ERANGE (i.e. spi not falling into allocated stack slots), we 1270 * will do check_mem_access to check and update stack bounds later, so 1271 * return true for that case. 1272 */ 1273 spi = irq_flag_get_spi(env, reg); 1274 if (spi == -ERANGE) 1275 return true; 1276 if (spi < 0) 1277 return false; 1278 1279 slot = &state->stack[spi]; 1280 1281 for (i = 0; i < BPF_REG_SIZE; i++) 1282 if (slot->slot_type[i] == STACK_IRQ_FLAG) 1283 return false; 1284 return true; 1285 } 1286 1287 static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 1288 { 1289 struct bpf_func_state *state = func(env, reg); 1290 struct bpf_stack_state *slot; 1291 struct bpf_reg_state *st; 1292 int spi, i; 1293 1294 spi = irq_flag_get_spi(env, reg); 1295 if (spi < 0) 1296 return -EINVAL; 1297 1298 slot = &state->stack[spi]; 1299 st = &slot->spilled_ptr; 1300 1301 if (!st->ref_obj_id) 1302 return -EINVAL; 1303 1304 for (i = 0; i < BPF_REG_SIZE; i++) 1305 if (slot->slot_type[i] != STACK_IRQ_FLAG) 1306 return -EINVAL; 1307 return 0; 1308 } 1309 1310 /* Check if given stack slot is "special": 1311 * - spilled register state (STACK_SPILL); 1312 * - dynptr state (STACK_DYNPTR); 1313 * - iter state (STACK_ITER). 1314 * - irq flag state (STACK_IRQ_FLAG) 1315 */ 1316 static bool is_stack_slot_special(const struct bpf_stack_state *stack) 1317 { 1318 enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1]; 1319 1320 switch (type) { 1321 case STACK_SPILL: 1322 case STACK_DYNPTR: 1323 case STACK_ITER: 1324 case STACK_IRQ_FLAG: 1325 return true; 1326 case STACK_INVALID: 1327 case STACK_MISC: 1328 case STACK_ZERO: 1329 return false; 1330 default: 1331 WARN_ONCE(1, "unknown stack slot type %d\n", type); 1332 return true; 1333 } 1334 } 1335 1336 /* The reg state of a pointer or a bounded scalar was saved when 1337 * it was spilled to the stack. 1338 */ 1339 static bool is_spilled_reg(const struct bpf_stack_state *stack) 1340 { 1341 return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL; 1342 } 1343 1344 static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack) 1345 { 1346 return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL && 1347 stack->spilled_ptr.type == SCALAR_VALUE; 1348 } 1349 1350 static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack) 1351 { 1352 return stack->slot_type[0] == STACK_SPILL && 1353 stack->spilled_ptr.type == SCALAR_VALUE; 1354 } 1355 1356 /* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which 1357 * case they are equivalent, or it's STACK_ZERO, in which case we preserve 1358 * more precise STACK_ZERO. 1359 * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged 1360 * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is 1361 * unnecessary as both are considered equivalent when loading data and pruning, 1362 * in case of unprivileged mode it will be incorrect to allow reads of invalid 1363 * slots. 1364 */ 1365 static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype) 1366 { 1367 if (*stype == STACK_ZERO) 1368 return; 1369 if (*stype == STACK_INVALID) 1370 return; 1371 *stype = STACK_MISC; 1372 } 1373 1374 static void scrub_spilled_slot(u8 *stype) 1375 { 1376 if (*stype != STACK_INVALID) 1377 *stype = STACK_MISC; 1378 } 1379 1380 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too 1381 * small to hold src. This is different from krealloc since we don't want to preserve 1382 * the contents of dst. 1383 * 1384 * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could 1385 * not be allocated. 1386 */ 1387 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags) 1388 { 1389 size_t alloc_bytes; 1390 void *orig = dst; 1391 size_t bytes; 1392 1393 if (ZERO_OR_NULL_PTR(src)) 1394 goto out; 1395 1396 if (unlikely(check_mul_overflow(n, size, &bytes))) 1397 return NULL; 1398 1399 alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes)); 1400 dst = krealloc(orig, alloc_bytes, flags); 1401 if (!dst) { 1402 kfree(orig); 1403 return NULL; 1404 } 1405 1406 memcpy(dst, src, bytes); 1407 out: 1408 return dst ? dst : ZERO_SIZE_PTR; 1409 } 1410 1411 /* resize an array from old_n items to new_n items. the array is reallocated if it's too 1412 * small to hold new_n items. new items are zeroed out if the array grows. 1413 * 1414 * Contrary to krealloc_array, does not free arr if new_n is zero. 1415 */ 1416 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size) 1417 { 1418 size_t alloc_size; 1419 void *new_arr; 1420 1421 if (!new_n || old_n == new_n) 1422 goto out; 1423 1424 alloc_size = kmalloc_size_roundup(size_mul(new_n, size)); 1425 new_arr = krealloc(arr, alloc_size, GFP_KERNEL_ACCOUNT); 1426 if (!new_arr) { 1427 kfree(arr); 1428 return NULL; 1429 } 1430 arr = new_arr; 1431 1432 if (new_n > old_n) 1433 memset(arr + old_n * size, 0, (new_n - old_n) * size); 1434 1435 out: 1436 return arr ? arr : ZERO_SIZE_PTR; 1437 } 1438 1439 static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src) 1440 { 1441 dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs, 1442 sizeof(struct bpf_reference_state), GFP_KERNEL_ACCOUNT); 1443 if (!dst->refs) 1444 return -ENOMEM; 1445 1446 dst->acquired_refs = src->acquired_refs; 1447 dst->active_locks = src->active_locks; 1448 dst->active_preempt_locks = src->active_preempt_locks; 1449 dst->active_rcu_locks = src->active_rcu_locks; 1450 dst->active_irq_id = src->active_irq_id; 1451 dst->active_lock_id = src->active_lock_id; 1452 dst->active_lock_ptr = src->active_lock_ptr; 1453 return 0; 1454 } 1455 1456 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src) 1457 { 1458 size_t n = src->allocated_stack / BPF_REG_SIZE; 1459 1460 dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state), 1461 GFP_KERNEL_ACCOUNT); 1462 if (!dst->stack) 1463 return -ENOMEM; 1464 1465 dst->allocated_stack = src->allocated_stack; 1466 return 0; 1467 } 1468 1469 static int resize_reference_state(struct bpf_verifier_state *state, size_t n) 1470 { 1471 state->refs = realloc_array(state->refs, state->acquired_refs, n, 1472 sizeof(struct bpf_reference_state)); 1473 if (!state->refs) 1474 return -ENOMEM; 1475 1476 state->acquired_refs = n; 1477 return 0; 1478 } 1479 1480 /* Possibly update state->allocated_stack to be at least size bytes. Also 1481 * possibly update the function's high-water mark in its bpf_subprog_info. 1482 */ 1483 static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size) 1484 { 1485 size_t old_n = state->allocated_stack / BPF_REG_SIZE, n; 1486 1487 /* The stack size is always a multiple of BPF_REG_SIZE. */ 1488 size = round_up(size, BPF_REG_SIZE); 1489 n = size / BPF_REG_SIZE; 1490 1491 if (old_n >= n) 1492 return 0; 1493 1494 state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state)); 1495 if (!state->stack) 1496 return -ENOMEM; 1497 1498 state->allocated_stack = size; 1499 1500 /* update known max for given subprogram */ 1501 if (env->subprog_info[state->subprogno].stack_depth < size) 1502 env->subprog_info[state->subprogno].stack_depth = size; 1503 1504 return 0; 1505 } 1506 1507 /* Acquire a pointer id from the env and update the state->refs to include 1508 * this new pointer reference. 1509 * On success, returns a valid pointer id to associate with the register 1510 * On failure, returns a negative errno. 1511 */ 1512 static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) 1513 { 1514 struct bpf_verifier_state *state = env->cur_state; 1515 int new_ofs = state->acquired_refs; 1516 int err; 1517 1518 err = resize_reference_state(state, state->acquired_refs + 1); 1519 if (err) 1520 return NULL; 1521 state->refs[new_ofs].insn_idx = insn_idx; 1522 1523 return &state->refs[new_ofs]; 1524 } 1525 1526 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx) 1527 { 1528 struct bpf_reference_state *s; 1529 1530 s = acquire_reference_state(env, insn_idx); 1531 if (!s) 1532 return -ENOMEM; 1533 s->type = REF_TYPE_PTR; 1534 s->id = ++env->id_gen; 1535 return s->id; 1536 } 1537 1538 static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type, 1539 int id, void *ptr) 1540 { 1541 struct bpf_verifier_state *state = env->cur_state; 1542 struct bpf_reference_state *s; 1543 1544 s = acquire_reference_state(env, insn_idx); 1545 if (!s) 1546 return -ENOMEM; 1547 s->type = type; 1548 s->id = id; 1549 s->ptr = ptr; 1550 1551 state->active_locks++; 1552 state->active_lock_id = id; 1553 state->active_lock_ptr = ptr; 1554 return 0; 1555 } 1556 1557 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx) 1558 { 1559 struct bpf_verifier_state *state = env->cur_state; 1560 struct bpf_reference_state *s; 1561 1562 s = acquire_reference_state(env, insn_idx); 1563 if (!s) 1564 return -ENOMEM; 1565 s->type = REF_TYPE_IRQ; 1566 s->id = ++env->id_gen; 1567 1568 state->active_irq_id = s->id; 1569 return s->id; 1570 } 1571 1572 static void release_reference_state(struct bpf_verifier_state *state, int idx) 1573 { 1574 int last_idx; 1575 size_t rem; 1576 1577 /* IRQ state requires the relative ordering of elements remaining the 1578 * same, since it relies on the refs array to behave as a stack, so that 1579 * it can detect out-of-order IRQ restore. Hence use memmove to shift 1580 * the array instead of swapping the final element into the deleted idx. 1581 */ 1582 last_idx = state->acquired_refs - 1; 1583 rem = state->acquired_refs - idx - 1; 1584 if (last_idx && idx != last_idx) 1585 memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem); 1586 memset(&state->refs[last_idx], 0, sizeof(*state->refs)); 1587 state->acquired_refs--; 1588 return; 1589 } 1590 1591 static bool find_reference_state(struct bpf_verifier_state *state, int ptr_id) 1592 { 1593 int i; 1594 1595 for (i = 0; i < state->acquired_refs; i++) 1596 if (state->refs[i].id == ptr_id) 1597 return true; 1598 1599 return false; 1600 } 1601 1602 static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr) 1603 { 1604 void *prev_ptr = NULL; 1605 u32 prev_id = 0; 1606 int i; 1607 1608 for (i = 0; i < state->acquired_refs; i++) { 1609 if (state->refs[i].type == type && state->refs[i].id == id && 1610 state->refs[i].ptr == ptr) { 1611 release_reference_state(state, i); 1612 state->active_locks--; 1613 /* Reassign active lock (id, ptr). */ 1614 state->active_lock_id = prev_id; 1615 state->active_lock_ptr = prev_ptr; 1616 return 0; 1617 } 1618 if (state->refs[i].type & REF_TYPE_LOCK_MASK) { 1619 prev_id = state->refs[i].id; 1620 prev_ptr = state->refs[i].ptr; 1621 } 1622 } 1623 return -EINVAL; 1624 } 1625 1626 static int release_irq_state(struct bpf_verifier_state *state, int id) 1627 { 1628 u32 prev_id = 0; 1629 int i; 1630 1631 if (id != state->active_irq_id) 1632 return -EACCES; 1633 1634 for (i = 0; i < state->acquired_refs; i++) { 1635 if (state->refs[i].type != REF_TYPE_IRQ) 1636 continue; 1637 if (state->refs[i].id == id) { 1638 release_reference_state(state, i); 1639 state->active_irq_id = prev_id; 1640 return 0; 1641 } else { 1642 prev_id = state->refs[i].id; 1643 } 1644 } 1645 return -EINVAL; 1646 } 1647 1648 static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type, 1649 int id, void *ptr) 1650 { 1651 int i; 1652 1653 for (i = 0; i < state->acquired_refs; i++) { 1654 struct bpf_reference_state *s = &state->refs[i]; 1655 1656 if (!(s->type & type)) 1657 continue; 1658 1659 if (s->id == id && s->ptr == ptr) 1660 return s; 1661 } 1662 return NULL; 1663 } 1664 1665 static void update_peak_states(struct bpf_verifier_env *env) 1666 { 1667 u32 cur_states; 1668 1669 cur_states = env->explored_states_size + env->free_list_size + env->num_backedges; 1670 env->peak_states = max(env->peak_states, cur_states); 1671 } 1672 1673 static void free_func_state(struct bpf_func_state *state) 1674 { 1675 if (!state) 1676 return; 1677 kfree(state->stack); 1678 kfree(state); 1679 } 1680 1681 static void clear_jmp_history(struct bpf_verifier_state *state) 1682 { 1683 kfree(state->jmp_history); 1684 state->jmp_history = NULL; 1685 state->jmp_history_cnt = 0; 1686 } 1687 1688 static void free_verifier_state(struct bpf_verifier_state *state, 1689 bool free_self) 1690 { 1691 int i; 1692 1693 for (i = 0; i <= state->curframe; i++) { 1694 free_func_state(state->frame[i]); 1695 state->frame[i] = NULL; 1696 } 1697 kfree(state->refs); 1698 clear_jmp_history(state); 1699 if (free_self) 1700 kfree(state); 1701 } 1702 1703 /* struct bpf_verifier_state->parent refers to states 1704 * that are in either of env->{expored_states,free_list}. 1705 * In both cases the state is contained in struct bpf_verifier_state_list. 1706 */ 1707 static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st) 1708 { 1709 if (st->parent) 1710 return container_of(st->parent, struct bpf_verifier_state_list, state); 1711 return NULL; 1712 } 1713 1714 static bool incomplete_read_marks(struct bpf_verifier_env *env, 1715 struct bpf_verifier_state *st); 1716 1717 /* A state can be freed if it is no longer referenced: 1718 * - is in the env->free_list; 1719 * - has no children states; 1720 */ 1721 static void maybe_free_verifier_state(struct bpf_verifier_env *env, 1722 struct bpf_verifier_state_list *sl) 1723 { 1724 if (!sl->in_free_list 1725 || sl->state.branches != 0 1726 || incomplete_read_marks(env, &sl->state)) 1727 return; 1728 list_del(&sl->node); 1729 free_verifier_state(&sl->state, false); 1730 kfree(sl); 1731 env->free_list_size--; 1732 } 1733 1734 /* copy verifier state from src to dst growing dst stack space 1735 * when necessary to accommodate larger src stack 1736 */ 1737 static int copy_func_state(struct bpf_func_state *dst, 1738 const struct bpf_func_state *src) 1739 { 1740 memcpy(dst, src, offsetof(struct bpf_func_state, stack)); 1741 return copy_stack_state(dst, src); 1742 } 1743 1744 static int copy_verifier_state(struct bpf_verifier_state *dst_state, 1745 const struct bpf_verifier_state *src) 1746 { 1747 struct bpf_func_state *dst; 1748 int i, err; 1749 1750 dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history, 1751 src->jmp_history_cnt, sizeof(*dst_state->jmp_history), 1752 GFP_KERNEL_ACCOUNT); 1753 if (!dst_state->jmp_history) 1754 return -ENOMEM; 1755 dst_state->jmp_history_cnt = src->jmp_history_cnt; 1756 1757 /* if dst has more stack frames then src frame, free them, this is also 1758 * necessary in case of exceptional exits using bpf_throw. 1759 */ 1760 for (i = src->curframe + 1; i <= dst_state->curframe; i++) { 1761 free_func_state(dst_state->frame[i]); 1762 dst_state->frame[i] = NULL; 1763 } 1764 err = copy_reference_state(dst_state, src); 1765 if (err) 1766 return err; 1767 dst_state->speculative = src->speculative; 1768 dst_state->in_sleepable = src->in_sleepable; 1769 dst_state->cleaned = src->cleaned; 1770 dst_state->curframe = src->curframe; 1771 dst_state->branches = src->branches; 1772 dst_state->parent = src->parent; 1773 dst_state->first_insn_idx = src->first_insn_idx; 1774 dst_state->last_insn_idx = src->last_insn_idx; 1775 dst_state->dfs_depth = src->dfs_depth; 1776 dst_state->callback_unroll_depth = src->callback_unroll_depth; 1777 dst_state->may_goto_depth = src->may_goto_depth; 1778 dst_state->equal_state = src->equal_state; 1779 for (i = 0; i <= src->curframe; i++) { 1780 dst = dst_state->frame[i]; 1781 if (!dst) { 1782 dst = kzalloc_obj(*dst, GFP_KERNEL_ACCOUNT); 1783 if (!dst) 1784 return -ENOMEM; 1785 dst_state->frame[i] = dst; 1786 } 1787 err = copy_func_state(dst, src->frame[i]); 1788 if (err) 1789 return err; 1790 } 1791 return 0; 1792 } 1793 1794 static u32 state_htab_size(struct bpf_verifier_env *env) 1795 { 1796 return env->prog->len; 1797 } 1798 1799 static struct list_head *explored_state(struct bpf_verifier_env *env, int idx) 1800 { 1801 struct bpf_verifier_state *cur = env->cur_state; 1802 struct bpf_func_state *state = cur->frame[cur->curframe]; 1803 1804 return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; 1805 } 1806 1807 static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b) 1808 { 1809 int fr; 1810 1811 if (a->curframe != b->curframe) 1812 return false; 1813 1814 for (fr = a->curframe; fr >= 0; fr--) 1815 if (a->frame[fr]->callsite != b->frame[fr]->callsite) 1816 return false; 1817 1818 return true; 1819 } 1820 1821 /* Return IP for a given frame in a call stack */ 1822 static u32 frame_insn_idx(struct bpf_verifier_state *st, u32 frame) 1823 { 1824 return frame == st->curframe 1825 ? st->insn_idx 1826 : st->frame[frame + 1]->callsite; 1827 } 1828 1829 /* For state @st look for a topmost frame with frame_insn_idx() in some SCC, 1830 * if such frame exists form a corresponding @callchain as an array of 1831 * call sites leading to this frame and SCC id. 1832 * E.g.: 1833 * 1834 * void foo() { A: loop {... SCC#1 ...}; } 1835 * void bar() { B: loop { C: foo(); ... SCC#2 ... } 1836 * D: loop { E: foo(); ... SCC#3 ... } } 1837 * void main() { F: bar(); } 1838 * 1839 * @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending 1840 * on @st frame call sites being (F,C,A) or (F,E,A). 1841 */ 1842 static bool compute_scc_callchain(struct bpf_verifier_env *env, 1843 struct bpf_verifier_state *st, 1844 struct bpf_scc_callchain *callchain) 1845 { 1846 u32 i, scc, insn_idx; 1847 1848 memset(callchain, 0, sizeof(*callchain)); 1849 for (i = 0; i <= st->curframe; i++) { 1850 insn_idx = frame_insn_idx(st, i); 1851 scc = env->insn_aux_data[insn_idx].scc; 1852 if (scc) { 1853 callchain->scc = scc; 1854 break; 1855 } else if (i < st->curframe) { 1856 callchain->callsites[i] = insn_idx; 1857 } else { 1858 return false; 1859 } 1860 } 1861 return true; 1862 } 1863 1864 /* Check if bpf_scc_visit instance for @callchain exists. */ 1865 static struct bpf_scc_visit *scc_visit_lookup(struct bpf_verifier_env *env, 1866 struct bpf_scc_callchain *callchain) 1867 { 1868 struct bpf_scc_info *info = env->scc_info[callchain->scc]; 1869 struct bpf_scc_visit *visits = info->visits; 1870 u32 i; 1871 1872 if (!info) 1873 return NULL; 1874 for (i = 0; i < info->num_visits; i++) 1875 if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0) 1876 return &visits[i]; 1877 return NULL; 1878 } 1879 1880 /* Allocate a new bpf_scc_visit instance corresponding to @callchain. 1881 * Allocated instances are alive for a duration of the do_check_common() 1882 * call and are freed by free_states(). 1883 */ 1884 static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env, 1885 struct bpf_scc_callchain *callchain) 1886 { 1887 struct bpf_scc_visit *visit; 1888 struct bpf_scc_info *info; 1889 u32 scc, num_visits; 1890 u64 new_sz; 1891 1892 scc = callchain->scc; 1893 info = env->scc_info[scc]; 1894 num_visits = info ? info->num_visits : 0; 1895 new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1); 1896 info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT); 1897 if (!info) 1898 return NULL; 1899 env->scc_info[scc] = info; 1900 info->num_visits = num_visits + 1; 1901 visit = &info->visits[num_visits]; 1902 memset(visit, 0, sizeof(*visit)); 1903 memcpy(&visit->callchain, callchain, sizeof(*callchain)); 1904 return visit; 1905 } 1906 1907 /* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */ 1908 static char *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain) 1909 { 1910 char *buf = env->tmp_str_buf; 1911 int i, delta = 0; 1912 1913 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "("); 1914 for (i = 0; i < ARRAY_SIZE(callchain->callsites); i++) { 1915 if (!callchain->callsites[i]) 1916 break; 1917 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u,", 1918 callchain->callsites[i]); 1919 } 1920 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u)", callchain->scc); 1921 return env->tmp_str_buf; 1922 } 1923 1924 /* If callchain for @st exists (@st is in some SCC), ensure that 1925 * bpf_scc_visit instance for this callchain exists. 1926 * If instance does not exist or is empty, assign visit->entry_state to @st. 1927 */ 1928 static int maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 1929 { 1930 struct bpf_scc_callchain *callchain = &env->callchain_buf; 1931 struct bpf_scc_visit *visit; 1932 1933 if (!compute_scc_callchain(env, st, callchain)) 1934 return 0; 1935 visit = scc_visit_lookup(env, callchain); 1936 visit = visit ?: scc_visit_alloc(env, callchain); 1937 if (!visit) 1938 return -ENOMEM; 1939 if (!visit->entry_state) { 1940 visit->entry_state = st; 1941 if (env->log.level & BPF_LOG_LEVEL2) 1942 verbose(env, "SCC enter %s\n", format_callchain(env, callchain)); 1943 } 1944 return 0; 1945 } 1946 1947 static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit); 1948 1949 /* If callchain for @st exists (@st is in some SCC), make it empty: 1950 * - set visit->entry_state to NULL; 1951 * - flush accumulated backedges. 1952 */ 1953 static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 1954 { 1955 struct bpf_scc_callchain *callchain = &env->callchain_buf; 1956 struct bpf_scc_visit *visit; 1957 1958 if (!compute_scc_callchain(env, st, callchain)) 1959 return 0; 1960 visit = scc_visit_lookup(env, callchain); 1961 if (!visit) { 1962 /* 1963 * If path traversal stops inside an SCC, corresponding bpf_scc_visit 1964 * must exist for non-speculative paths. For non-speculative paths 1965 * traversal stops when: 1966 * a. Verification error is found, maybe_exit_scc() is not called. 1967 * b. Top level BPF_EXIT is reached. Top level BPF_EXIT is not a member 1968 * of any SCC. 1969 * c. A checkpoint is reached and matched. Checkpoints are created by 1970 * is_state_visited(), which calls maybe_enter_scc(), which allocates 1971 * bpf_scc_visit instances for checkpoints within SCCs. 1972 * (c) is the only case that can reach this point. 1973 */ 1974 if (!st->speculative) { 1975 verifier_bug(env, "scc exit: no visit info for call chain %s", 1976 format_callchain(env, callchain)); 1977 return -EFAULT; 1978 } 1979 return 0; 1980 } 1981 if (visit->entry_state != st) 1982 return 0; 1983 if (env->log.level & BPF_LOG_LEVEL2) 1984 verbose(env, "SCC exit %s\n", format_callchain(env, callchain)); 1985 visit->entry_state = NULL; 1986 env->num_backedges -= visit->num_backedges; 1987 visit->num_backedges = 0; 1988 update_peak_states(env); 1989 return propagate_backedges(env, visit); 1990 } 1991 1992 /* Lookup an bpf_scc_visit instance corresponding to @st callchain 1993 * and add @backedge to visit->backedges. @st callchain must exist. 1994 */ 1995 static int add_scc_backedge(struct bpf_verifier_env *env, 1996 struct bpf_verifier_state *st, 1997 struct bpf_scc_backedge *backedge) 1998 { 1999 struct bpf_scc_callchain *callchain = &env->callchain_buf; 2000 struct bpf_scc_visit *visit; 2001 2002 if (!compute_scc_callchain(env, st, callchain)) { 2003 verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d", 2004 st->insn_idx); 2005 return -EFAULT; 2006 } 2007 visit = scc_visit_lookup(env, callchain); 2008 if (!visit) { 2009 verifier_bug(env, "add backedge: no visit info for call chain %s", 2010 format_callchain(env, callchain)); 2011 return -EFAULT; 2012 } 2013 if (env->log.level & BPF_LOG_LEVEL2) 2014 verbose(env, "SCC backedge %s\n", format_callchain(env, callchain)); 2015 backedge->next = visit->backedges; 2016 visit->backedges = backedge; 2017 visit->num_backedges++; 2018 env->num_backedges++; 2019 update_peak_states(env); 2020 return 0; 2021 } 2022 2023 /* bpf_reg_state->live marks for registers in a state @st are incomplete, 2024 * if state @st is in some SCC and not all execution paths starting at this 2025 * SCC are fully explored. 2026 */ 2027 static bool incomplete_read_marks(struct bpf_verifier_env *env, 2028 struct bpf_verifier_state *st) 2029 { 2030 struct bpf_scc_callchain *callchain = &env->callchain_buf; 2031 struct bpf_scc_visit *visit; 2032 2033 if (!compute_scc_callchain(env, st, callchain)) 2034 return false; 2035 visit = scc_visit_lookup(env, callchain); 2036 if (!visit) 2037 return false; 2038 return !!visit->backedges; 2039 } 2040 2041 static void free_backedges(struct bpf_scc_visit *visit) 2042 { 2043 struct bpf_scc_backedge *backedge, *next; 2044 2045 for (backedge = visit->backedges; backedge; backedge = next) { 2046 free_verifier_state(&backedge->state, false); 2047 next = backedge->next; 2048 kfree(backedge); 2049 } 2050 visit->backedges = NULL; 2051 } 2052 2053 static int update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 2054 { 2055 struct bpf_verifier_state_list *sl = NULL, *parent_sl; 2056 struct bpf_verifier_state *parent; 2057 int err; 2058 2059 while (st) { 2060 u32 br = --st->branches; 2061 2062 /* verifier_bug_if(br > 1, ...) technically makes sense here, 2063 * but see comment in push_stack(), hence: 2064 */ 2065 verifier_bug_if((int)br < 0, env, "%s:branches_to_explore=%d", __func__, br); 2066 if (br) 2067 break; 2068 err = maybe_exit_scc(env, st); 2069 if (err) 2070 return err; 2071 parent = st->parent; 2072 parent_sl = state_parent_as_list(st); 2073 if (sl) 2074 maybe_free_verifier_state(env, sl); 2075 st = parent; 2076 sl = parent_sl; 2077 } 2078 return 0; 2079 } 2080 2081 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, 2082 int *insn_idx, bool pop_log) 2083 { 2084 struct bpf_verifier_state *cur = env->cur_state; 2085 struct bpf_verifier_stack_elem *elem, *head = env->head; 2086 int err; 2087 2088 if (env->head == NULL) 2089 return -ENOENT; 2090 2091 if (cur) { 2092 err = copy_verifier_state(cur, &head->st); 2093 if (err) 2094 return err; 2095 } 2096 if (pop_log) 2097 bpf_vlog_reset(&env->log, head->log_pos); 2098 if (insn_idx) 2099 *insn_idx = head->insn_idx; 2100 if (prev_insn_idx) 2101 *prev_insn_idx = head->prev_insn_idx; 2102 elem = head->next; 2103 free_verifier_state(&head->st, false); 2104 kfree(head); 2105 env->head = elem; 2106 env->stack_size--; 2107 return 0; 2108 } 2109 2110 static bool error_recoverable_with_nospec(int err) 2111 { 2112 /* Should only return true for non-fatal errors that are allowed to 2113 * occur during speculative verification. For these we can insert a 2114 * nospec and the program might still be accepted. Do not include 2115 * something like ENOMEM because it is likely to re-occur for the next 2116 * architectural path once it has been recovered-from in all speculative 2117 * paths. 2118 */ 2119 return err == -EPERM || err == -EACCES || err == -EINVAL; 2120 } 2121 2122 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, 2123 int insn_idx, int prev_insn_idx, 2124 bool speculative) 2125 { 2126 struct bpf_verifier_state *cur = env->cur_state; 2127 struct bpf_verifier_stack_elem *elem; 2128 int err; 2129 2130 elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT); 2131 if (!elem) 2132 return ERR_PTR(-ENOMEM); 2133 2134 elem->insn_idx = insn_idx; 2135 elem->prev_insn_idx = prev_insn_idx; 2136 elem->next = env->head; 2137 elem->log_pos = env->log.end_pos; 2138 env->head = elem; 2139 env->stack_size++; 2140 err = copy_verifier_state(&elem->st, cur); 2141 if (err) 2142 return ERR_PTR(-ENOMEM); 2143 elem->st.speculative |= speculative; 2144 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { 2145 verbose(env, "The sequence of %d jumps is too complex.\n", 2146 env->stack_size); 2147 return ERR_PTR(-E2BIG); 2148 } 2149 if (elem->st.parent) { 2150 ++elem->st.parent->branches; 2151 /* WARN_ON(branches > 2) technically makes sense here, 2152 * but 2153 * 1. speculative states will bump 'branches' for non-branch 2154 * instructions 2155 * 2. is_state_visited() heuristics may decide not to create 2156 * a new state for a sequence of branches and all such current 2157 * and cloned states will be pointing to a single parent state 2158 * which might have large 'branches' count. 2159 */ 2160 } 2161 return &elem->st; 2162 } 2163 2164 #define CALLER_SAVED_REGS 6 2165 static const int caller_saved[CALLER_SAVED_REGS] = { 2166 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 2167 }; 2168 2169 /* This helper doesn't clear reg->id */ 2170 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm) 2171 { 2172 reg->var_off = tnum_const(imm); 2173 reg->smin_value = (s64)imm; 2174 reg->smax_value = (s64)imm; 2175 reg->umin_value = imm; 2176 reg->umax_value = imm; 2177 2178 reg->s32_min_value = (s32)imm; 2179 reg->s32_max_value = (s32)imm; 2180 reg->u32_min_value = (u32)imm; 2181 reg->u32_max_value = (u32)imm; 2182 } 2183 2184 /* Mark the unknown part of a register (variable offset or scalar value) as 2185 * known to have the value @imm. 2186 */ 2187 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) 2188 { 2189 /* Clear off and union(map_ptr, range) */ 2190 memset(((u8 *)reg) + sizeof(reg->type), 0, 2191 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); 2192 reg->id = 0; 2193 reg->ref_obj_id = 0; 2194 ___mark_reg_known(reg, imm); 2195 } 2196 2197 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm) 2198 { 2199 reg->var_off = tnum_const_subreg(reg->var_off, imm); 2200 reg->s32_min_value = (s32)imm; 2201 reg->s32_max_value = (s32)imm; 2202 reg->u32_min_value = (u32)imm; 2203 reg->u32_max_value = (u32)imm; 2204 } 2205 2206 /* Mark the 'variable offset' part of a register as zero. This should be 2207 * used only on registers holding a pointer type. 2208 */ 2209 static void __mark_reg_known_zero(struct bpf_reg_state *reg) 2210 { 2211 __mark_reg_known(reg, 0); 2212 } 2213 2214 static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg) 2215 { 2216 __mark_reg_known(reg, 0); 2217 reg->type = SCALAR_VALUE; 2218 /* all scalars are assumed imprecise initially (unless unprivileged, 2219 * in which case everything is forced to be precise) 2220 */ 2221 reg->precise = !env->bpf_capable; 2222 } 2223 2224 static void mark_reg_known_zero(struct bpf_verifier_env *env, 2225 struct bpf_reg_state *regs, u32 regno) 2226 { 2227 if (WARN_ON(regno >= MAX_BPF_REG)) { 2228 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno); 2229 /* Something bad happened, let's kill all regs */ 2230 for (regno = 0; regno < MAX_BPF_REG; regno++) 2231 __mark_reg_not_init(env, regs + regno); 2232 return; 2233 } 2234 __mark_reg_known_zero(regs + regno); 2235 } 2236 2237 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type, 2238 bool first_slot, int dynptr_id) 2239 { 2240 /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for 2241 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply 2242 * set it unconditionally as it is ignored for STACK_DYNPTR anyway. 2243 */ 2244 __mark_reg_known_zero(reg); 2245 reg->type = CONST_PTR_TO_DYNPTR; 2246 /* Give each dynptr a unique id to uniquely associate slices to it. */ 2247 reg->id = dynptr_id; 2248 reg->dynptr.type = type; 2249 reg->dynptr.first_slot = first_slot; 2250 } 2251 2252 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) 2253 { 2254 if (base_type(reg->type) == PTR_TO_MAP_VALUE) { 2255 const struct bpf_map *map = reg->map_ptr; 2256 2257 if (map->inner_map_meta) { 2258 reg->type = CONST_PTR_TO_MAP; 2259 reg->map_ptr = map->inner_map_meta; 2260 /* transfer reg's id which is unique for every map_lookup_elem 2261 * as UID of the inner map. 2262 */ 2263 if (btf_record_has_field(map->inner_map_meta->record, 2264 BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) { 2265 reg->map_uid = reg->id; 2266 } 2267 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { 2268 reg->type = PTR_TO_XDP_SOCK; 2269 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || 2270 map->map_type == BPF_MAP_TYPE_SOCKHASH) { 2271 reg->type = PTR_TO_SOCKET; 2272 } else { 2273 reg->type = PTR_TO_MAP_VALUE; 2274 } 2275 return; 2276 } 2277 2278 reg->type &= ~PTR_MAYBE_NULL; 2279 } 2280 2281 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno, 2282 struct btf_field_graph_root *ds_head) 2283 { 2284 __mark_reg_known_zero(®s[regno]); 2285 regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC; 2286 regs[regno].btf = ds_head->btf; 2287 regs[regno].btf_id = ds_head->value_btf_id; 2288 regs[regno].off = ds_head->node_offset; 2289 } 2290 2291 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) 2292 { 2293 return type_is_pkt_pointer(reg->type); 2294 } 2295 2296 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg) 2297 { 2298 return reg_is_pkt_pointer(reg) || 2299 reg->type == PTR_TO_PACKET_END; 2300 } 2301 2302 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg) 2303 { 2304 return base_type(reg->type) == PTR_TO_MEM && 2305 (reg->type & 2306 (DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)); 2307 } 2308 2309 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */ 2310 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, 2311 enum bpf_reg_type which) 2312 { 2313 /* The register can already have a range from prior markings. 2314 * This is fine as long as it hasn't been advanced from its 2315 * origin. 2316 */ 2317 return reg->type == which && 2318 reg->id == 0 && 2319 reg->off == 0 && 2320 tnum_equals_const(reg->var_off, 0); 2321 } 2322 2323 /* Reset the min/max bounds of a register */ 2324 static void __mark_reg_unbounded(struct bpf_reg_state *reg) 2325 { 2326 reg->smin_value = S64_MIN; 2327 reg->smax_value = S64_MAX; 2328 reg->umin_value = 0; 2329 reg->umax_value = U64_MAX; 2330 2331 reg->s32_min_value = S32_MIN; 2332 reg->s32_max_value = S32_MAX; 2333 reg->u32_min_value = 0; 2334 reg->u32_max_value = U32_MAX; 2335 } 2336 2337 static void __mark_reg64_unbounded(struct bpf_reg_state *reg) 2338 { 2339 reg->smin_value = S64_MIN; 2340 reg->smax_value = S64_MAX; 2341 reg->umin_value = 0; 2342 reg->umax_value = U64_MAX; 2343 } 2344 2345 static void __mark_reg32_unbounded(struct bpf_reg_state *reg) 2346 { 2347 reg->s32_min_value = S32_MIN; 2348 reg->s32_max_value = S32_MAX; 2349 reg->u32_min_value = 0; 2350 reg->u32_max_value = U32_MAX; 2351 } 2352 2353 static void reset_reg64_and_tnum(struct bpf_reg_state *reg) 2354 { 2355 __mark_reg64_unbounded(reg); 2356 reg->var_off = tnum_unknown; 2357 } 2358 2359 static void reset_reg32_and_tnum(struct bpf_reg_state *reg) 2360 { 2361 __mark_reg32_unbounded(reg); 2362 reg->var_off = tnum_unknown; 2363 } 2364 2365 static void __update_reg32_bounds(struct bpf_reg_state *reg) 2366 { 2367 struct tnum var32_off = tnum_subreg(reg->var_off); 2368 2369 /* min signed is max(sign bit) | min(other bits) */ 2370 reg->s32_min_value = max_t(s32, reg->s32_min_value, 2371 var32_off.value | (var32_off.mask & S32_MIN)); 2372 /* max signed is min(sign bit) | max(other bits) */ 2373 reg->s32_max_value = min_t(s32, reg->s32_max_value, 2374 var32_off.value | (var32_off.mask & S32_MAX)); 2375 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value); 2376 reg->u32_max_value = min(reg->u32_max_value, 2377 (u32)(var32_off.value | var32_off.mask)); 2378 } 2379 2380 static void __update_reg64_bounds(struct bpf_reg_state *reg) 2381 { 2382 u64 tnum_next, tmax; 2383 bool umin_in_tnum; 2384 2385 /* min signed is max(sign bit) | min(other bits) */ 2386 reg->smin_value = max_t(s64, reg->smin_value, 2387 reg->var_off.value | (reg->var_off.mask & S64_MIN)); 2388 /* max signed is min(sign bit) | max(other bits) */ 2389 reg->smax_value = min_t(s64, reg->smax_value, 2390 reg->var_off.value | (reg->var_off.mask & S64_MAX)); 2391 reg->umin_value = max(reg->umin_value, reg->var_off.value); 2392 reg->umax_value = min(reg->umax_value, 2393 reg->var_off.value | reg->var_off.mask); 2394 2395 /* Check if u64 and tnum overlap in a single value */ 2396 tnum_next = tnum_step(reg->var_off, reg->umin_value); 2397 umin_in_tnum = (reg->umin_value & ~reg->var_off.mask) == reg->var_off.value; 2398 tmax = reg->var_off.value | reg->var_off.mask; 2399 if (umin_in_tnum && tnum_next > reg->umax_value) { 2400 /* The u64 range and the tnum only overlap in umin. 2401 * u64: ---[xxxxxx]----- 2402 * tnum: --xx----------x- 2403 */ 2404 ___mark_reg_known(reg, reg->umin_value); 2405 } else if (!umin_in_tnum && tnum_next == tmax) { 2406 /* The u64 range and the tnum only overlap in the maximum value 2407 * represented by the tnum, called tmax. 2408 * u64: ---[xxxxxx]----- 2409 * tnum: xx-----x-------- 2410 */ 2411 ___mark_reg_known(reg, tmax); 2412 } else if (!umin_in_tnum && tnum_next <= reg->umax_value && 2413 tnum_step(reg->var_off, tnum_next) > reg->umax_value) { 2414 /* The u64 range and the tnum only overlap in between umin 2415 * (excluded) and umax. 2416 * u64: ---[xxxxxx]----- 2417 * tnum: xx----x-------x- 2418 */ 2419 ___mark_reg_known(reg, tnum_next); 2420 } 2421 } 2422 2423 static void __update_reg_bounds(struct bpf_reg_state *reg) 2424 { 2425 __update_reg32_bounds(reg); 2426 __update_reg64_bounds(reg); 2427 } 2428 2429 /* Uses signed min/max values to inform unsigned, and vice-versa */ 2430 static void __reg32_deduce_bounds(struct bpf_reg_state *reg) 2431 { 2432 /* If upper 32 bits of u64/s64 range don't change, we can use lower 32 2433 * bits to improve our u32/s32 boundaries. 2434 * 2435 * E.g., the case where we have upper 32 bits as zero ([10, 20] in 2436 * u64) is pretty trivial, it's obvious that in u32 we'll also have 2437 * [10, 20] range. But this property holds for any 64-bit range as 2438 * long as upper 32 bits in that entire range of values stay the same. 2439 * 2440 * E.g., u64 range [0x10000000A, 0x10000000F] ([4294967306, 4294967311] 2441 * in decimal) has the same upper 32 bits throughout all the values in 2442 * that range. As such, lower 32 bits form a valid [0xA, 0xF] ([10, 15]) 2443 * range. 2444 * 2445 * Note also, that [0xA, 0xF] is a valid range both in u32 and in s32, 2446 * following the rules outlined below about u64/s64 correspondence 2447 * (which equally applies to u32 vs s32 correspondence). In general it 2448 * depends on actual hexadecimal values of 32-bit range. They can form 2449 * only valid u32, or only valid s32 ranges in some cases. 2450 * 2451 * So we use all these insights to derive bounds for subregisters here. 2452 */ 2453 if ((reg->umin_value >> 32) == (reg->umax_value >> 32)) { 2454 /* u64 to u32 casting preserves validity of low 32 bits as 2455 * a range, if upper 32 bits are the same 2456 */ 2457 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->umin_value); 2458 reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->umax_value); 2459 2460 if ((s32)reg->umin_value <= (s32)reg->umax_value) { 2461 reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value); 2462 reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value); 2463 } 2464 } 2465 if ((reg->smin_value >> 32) == (reg->smax_value >> 32)) { 2466 /* low 32 bits should form a proper u32 range */ 2467 if ((u32)reg->smin_value <= (u32)reg->smax_value) { 2468 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->smin_value); 2469 reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->smax_value); 2470 } 2471 /* low 32 bits should form a proper s32 range */ 2472 if ((s32)reg->smin_value <= (s32)reg->smax_value) { 2473 reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value); 2474 reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value); 2475 } 2476 } 2477 /* Special case where upper bits form a small sequence of two 2478 * sequential numbers (in 32-bit unsigned space, so 0xffffffff to 2479 * 0x00000000 is also valid), while lower bits form a proper s32 range 2480 * going from negative numbers to positive numbers. E.g., let's say we 2481 * have s64 range [-1, 1] ([0xffffffffffffffff, 0x0000000000000001]). 2482 * Possible s64 values are {-1, 0, 1} ({0xffffffffffffffff, 2483 * 0x0000000000000000, 0x00000000000001}). Ignoring upper 32 bits, 2484 * we still get a valid s32 range [-1, 1] ([0xffffffff, 0x00000001]). 2485 * Note that it doesn't have to be 0xffffffff going to 0x00000000 in 2486 * upper 32 bits. As a random example, s64 range 2487 * [0xfffffff0fffffff0; 0xfffffff100000010], forms a valid s32 range 2488 * [-16, 16] ([0xfffffff0; 0x00000010]) in its 32 bit subregister. 2489 */ 2490 if ((u32)(reg->umin_value >> 32) + 1 == (u32)(reg->umax_value >> 32) && 2491 (s32)reg->umin_value < 0 && (s32)reg->umax_value >= 0) { 2492 reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value); 2493 reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value); 2494 } 2495 if ((u32)(reg->smin_value >> 32) + 1 == (u32)(reg->smax_value >> 32) && 2496 (s32)reg->smin_value < 0 && (s32)reg->smax_value >= 0) { 2497 reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value); 2498 reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value); 2499 } 2500 /* if u32 range forms a valid s32 range (due to matching sign bit), 2501 * try to learn from that 2502 */ 2503 if ((s32)reg->u32_min_value <= (s32)reg->u32_max_value) { 2504 reg->s32_min_value = max_t(s32, reg->s32_min_value, reg->u32_min_value); 2505 reg->s32_max_value = min_t(s32, reg->s32_max_value, reg->u32_max_value); 2506 } 2507 /* If we cannot cross the sign boundary, then signed and unsigned bounds 2508 * are the same, so combine. This works even in the negative case, e.g. 2509 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff. 2510 */ 2511 if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) { 2512 reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value); 2513 reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value); 2514 } else { 2515 if (reg->u32_max_value < (u32)reg->s32_min_value) { 2516 /* See __reg64_deduce_bounds() for detailed explanation. 2517 * Refine ranges in the following situation: 2518 * 2519 * 0 U32_MAX 2520 * | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] | 2521 * |----------------------------|----------------------------| 2522 * |xxxxx s32 range xxxxxxxxx] [xxxxxxx| 2523 * 0 S32_MAX S32_MIN -1 2524 */ 2525 reg->s32_min_value = (s32)reg->u32_min_value; 2526 reg->u32_max_value = min_t(u32, reg->u32_max_value, reg->s32_max_value); 2527 } else if ((u32)reg->s32_max_value < reg->u32_min_value) { 2528 /* 2529 * 0 U32_MAX 2530 * | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] | 2531 * |----------------------------|----------------------------| 2532 * |xxxxxxxxx] [xxxxxxxxxxxx s32 range | 2533 * 0 S32_MAX S32_MIN -1 2534 */ 2535 reg->s32_max_value = (s32)reg->u32_max_value; 2536 reg->u32_min_value = max_t(u32, reg->u32_min_value, reg->s32_min_value); 2537 } 2538 } 2539 } 2540 2541 static void __reg64_deduce_bounds(struct bpf_reg_state *reg) 2542 { 2543 /* If u64 range forms a valid s64 range (due to matching sign bit), 2544 * try to learn from that. Let's do a bit of ASCII art to see when 2545 * this is happening. Let's take u64 range first: 2546 * 2547 * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX 2548 * |-------------------------------|--------------------------------| 2549 * 2550 * Valid u64 range is formed when umin and umax are anywhere in the 2551 * range [0, U64_MAX], and umin <= umax. u64 case is simple and 2552 * straightforward. Let's see how s64 range maps onto the same range 2553 * of values, annotated below the line for comparison: 2554 * 2555 * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX 2556 * |-------------------------------|--------------------------------| 2557 * 0 S64_MAX S64_MIN -1 2558 * 2559 * So s64 values basically start in the middle and they are logically 2560 * contiguous to the right of it, wrapping around from -1 to 0, and 2561 * then finishing as S64_MAX (0x7fffffffffffffff) right before 2562 * S64_MIN. We can try drawing the continuity of u64 vs s64 values 2563 * more visually as mapped to sign-agnostic range of hex values. 2564 * 2565 * u64 start u64 end 2566 * _______________________________________________________________ 2567 * / \ 2568 * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX 2569 * |-------------------------------|--------------------------------| 2570 * 0 S64_MAX S64_MIN -1 2571 * / \ 2572 * >------------------------------ -------------------------------> 2573 * s64 continues... s64 end s64 start s64 "midpoint" 2574 * 2575 * What this means is that, in general, we can't always derive 2576 * something new about u64 from any random s64 range, and vice versa. 2577 * 2578 * But we can do that in two particular cases. One is when entire 2579 * u64/s64 range is *entirely* contained within left half of the above 2580 * diagram or when it is *entirely* contained in the right half. I.e.: 2581 * 2582 * |-------------------------------|--------------------------------| 2583 * ^ ^ ^ ^ 2584 * A B C D 2585 * 2586 * [A, B] and [C, D] are contained entirely in their respective halves 2587 * and form valid contiguous ranges as both u64 and s64 values. [A, B] 2588 * will be non-negative both as u64 and s64 (and in fact it will be 2589 * identical ranges no matter the signedness). [C, D] treated as s64 2590 * will be a range of negative values, while in u64 it will be 2591 * non-negative range of values larger than 0x8000000000000000. 2592 * 2593 * Now, any other range here can't be represented in both u64 and s64 2594 * simultaneously. E.g., [A, C], [A, D], [B, C], [B, D] are valid 2595 * contiguous u64 ranges, but they are discontinuous in s64. [B, C] 2596 * in s64 would be properly presented as [S64_MIN, C] and [B, S64_MAX], 2597 * for example. Similarly, valid s64 range [D, A] (going from negative 2598 * to positive values), would be two separate [D, U64_MAX] and [0, A] 2599 * ranges as u64. Currently reg_state can't represent two segments per 2600 * numeric domain, so in such situations we can only derive maximal 2601 * possible range ([0, U64_MAX] for u64, and [S64_MIN, S64_MAX] for s64). 2602 * 2603 * So we use these facts to derive umin/umax from smin/smax and vice 2604 * versa only if they stay within the same "half". This is equivalent 2605 * to checking sign bit: lower half will have sign bit as zero, upper 2606 * half have sign bit 1. Below in code we simplify this by just 2607 * casting umin/umax as smin/smax and checking if they form valid 2608 * range, and vice versa. Those are equivalent checks. 2609 */ 2610 if ((s64)reg->umin_value <= (s64)reg->umax_value) { 2611 reg->smin_value = max_t(s64, reg->smin_value, reg->umin_value); 2612 reg->smax_value = min_t(s64, reg->smax_value, reg->umax_value); 2613 } 2614 /* If we cannot cross the sign boundary, then signed and unsigned bounds 2615 * are the same, so combine. This works even in the negative case, e.g. 2616 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff. 2617 */ 2618 if ((u64)reg->smin_value <= (u64)reg->smax_value) { 2619 reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value); 2620 reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value); 2621 } else { 2622 /* If the s64 range crosses the sign boundary, then it's split 2623 * between the beginning and end of the U64 domain. In that 2624 * case, we can derive new bounds if the u64 range overlaps 2625 * with only one end of the s64 range. 2626 * 2627 * In the following example, the u64 range overlaps only with 2628 * positive portion of the s64 range. 2629 * 2630 * 0 U64_MAX 2631 * | [xxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxx] | 2632 * |----------------------------|----------------------------| 2633 * |xxxxx s64 range xxxxxxxxx] [xxxxxxx| 2634 * 0 S64_MAX S64_MIN -1 2635 * 2636 * We can thus derive the following new s64 and u64 ranges. 2637 * 2638 * 0 U64_MAX 2639 * | [xxxxxx u64 range xxxxx] | 2640 * |----------------------------|----------------------------| 2641 * | [xxxxxx s64 range xxxxx] | 2642 * 0 S64_MAX S64_MIN -1 2643 * 2644 * If they overlap in two places, we can't derive anything 2645 * because reg_state can't represent two ranges per numeric 2646 * domain. 2647 * 2648 * 0 U64_MAX 2649 * | [xxxxxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxxxxx] | 2650 * |----------------------------|----------------------------| 2651 * |xxxxx s64 range xxxxxxxxx] [xxxxxxxxxx| 2652 * 0 S64_MAX S64_MIN -1 2653 * 2654 * The first condition below corresponds to the first diagram 2655 * above. 2656 */ 2657 if (reg->umax_value < (u64)reg->smin_value) { 2658 reg->smin_value = (s64)reg->umin_value; 2659 reg->umax_value = min_t(u64, reg->umax_value, reg->smax_value); 2660 } else if ((u64)reg->smax_value < reg->umin_value) { 2661 /* This second condition considers the case where the u64 range 2662 * overlaps with the negative portion of the s64 range: 2663 * 2664 * 0 U64_MAX 2665 * | [xxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxx] | 2666 * |----------------------------|----------------------------| 2667 * |xxxxxxxxx] [xxxxxxxxxxxx s64 range | 2668 * 0 S64_MAX S64_MIN -1 2669 */ 2670 reg->smax_value = (s64)reg->umax_value; 2671 reg->umin_value = max_t(u64, reg->umin_value, reg->smin_value); 2672 } 2673 } 2674 } 2675 2676 static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg) 2677 { 2678 /* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit 2679 * values on both sides of 64-bit range in hope to have tighter range. 2680 * E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from 2681 * 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff]. 2682 * With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound 2683 * (0x100000000 -> 0x100000001) and 0x7fffffff as low 32-bits of 2684 * _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a 2685 * better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff]. 2686 * We just need to make sure that derived bounds we are intersecting 2687 * with are well-formed ranges in respective s64 or u64 domain, just 2688 * like we do with similar kinds of 32-to-64 or 64-to-32 adjustments. 2689 */ 2690 __u64 new_umin, new_umax; 2691 __s64 new_smin, new_smax; 2692 2693 /* u32 -> u64 tightening, it's always well-formed */ 2694 new_umin = (reg->umin_value & ~0xffffffffULL) | reg->u32_min_value; 2695 new_umax = (reg->umax_value & ~0xffffffffULL) | reg->u32_max_value; 2696 reg->umin_value = max_t(u64, reg->umin_value, new_umin); 2697 reg->umax_value = min_t(u64, reg->umax_value, new_umax); 2698 /* u32 -> s64 tightening, u32 range embedded into s64 preserves range validity */ 2699 new_smin = (reg->smin_value & ~0xffffffffULL) | reg->u32_min_value; 2700 new_smax = (reg->smax_value & ~0xffffffffULL) | reg->u32_max_value; 2701 reg->smin_value = max_t(s64, reg->smin_value, new_smin); 2702 reg->smax_value = min_t(s64, reg->smax_value, new_smax); 2703 2704 /* Here we would like to handle a special case after sign extending load, 2705 * when upper bits for a 64-bit range are all 1s or all 0s. 2706 * 2707 * Upper bits are all 1s when register is in a range: 2708 * [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff] 2709 * Upper bits are all 0s when register is in a range: 2710 * [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff] 2711 * Together this forms are continuous range: 2712 * [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff] 2713 * 2714 * Now, suppose that register range is in fact tighter: 2715 * [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R) 2716 * Also suppose that it's 32-bit range is positive, 2717 * meaning that lower 32-bits of the full 64-bit register 2718 * are in the range: 2719 * [0x0000_0000, 0x7fff_ffff] (W) 2720 * 2721 * If this happens, then any value in a range: 2722 * [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff] 2723 * is smaller than a lowest bound of the range (R): 2724 * 0xffff_ffff_8000_0000 2725 * which means that upper bits of the full 64-bit register 2726 * can't be all 1s, when lower bits are in range (W). 2727 * 2728 * Note that: 2729 * - 0xffff_ffff_8000_0000 == (s64)S32_MIN 2730 * - 0x0000_0000_7fff_ffff == (s64)S32_MAX 2731 * These relations are used in the conditions below. 2732 */ 2733 if (reg->s32_min_value >= 0 && reg->smin_value >= S32_MIN && reg->smax_value <= S32_MAX) { 2734 reg->smin_value = reg->s32_min_value; 2735 reg->smax_value = reg->s32_max_value; 2736 reg->umin_value = reg->s32_min_value; 2737 reg->umax_value = reg->s32_max_value; 2738 reg->var_off = tnum_intersect(reg->var_off, 2739 tnum_range(reg->smin_value, reg->smax_value)); 2740 } 2741 } 2742 2743 static void __reg_deduce_bounds(struct bpf_reg_state *reg) 2744 { 2745 __reg32_deduce_bounds(reg); 2746 __reg64_deduce_bounds(reg); 2747 __reg_deduce_mixed_bounds(reg); 2748 } 2749 2750 /* Attempts to improve var_off based on unsigned min/max information */ 2751 static void __reg_bound_offset(struct bpf_reg_state *reg) 2752 { 2753 struct tnum var64_off = tnum_intersect(reg->var_off, 2754 tnum_range(reg->umin_value, 2755 reg->umax_value)); 2756 struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off), 2757 tnum_range(reg->u32_min_value, 2758 reg->u32_max_value)); 2759 2760 reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); 2761 } 2762 2763 static void reg_bounds_sync(struct bpf_reg_state *reg) 2764 { 2765 /* We might have learned new bounds from the var_off. */ 2766 __update_reg_bounds(reg); 2767 /* We might have learned something about the sign bit. */ 2768 __reg_deduce_bounds(reg); 2769 __reg_deduce_bounds(reg); 2770 __reg_deduce_bounds(reg); 2771 /* We might have learned some bits from the bounds. */ 2772 __reg_bound_offset(reg); 2773 /* Intersecting with the old var_off might have improved our bounds 2774 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), 2775 * then new var_off is (0; 0x7f...fc) which improves our umax. 2776 */ 2777 __update_reg_bounds(reg); 2778 } 2779 2780 static int reg_bounds_sanity_check(struct bpf_verifier_env *env, 2781 struct bpf_reg_state *reg, const char *ctx) 2782 { 2783 const char *msg; 2784 2785 if (reg->umin_value > reg->umax_value || 2786 reg->smin_value > reg->smax_value || 2787 reg->u32_min_value > reg->u32_max_value || 2788 reg->s32_min_value > reg->s32_max_value) { 2789 msg = "range bounds violation"; 2790 goto out; 2791 } 2792 2793 if (tnum_is_const(reg->var_off)) { 2794 u64 uval = reg->var_off.value; 2795 s64 sval = (s64)uval; 2796 2797 if (reg->umin_value != uval || reg->umax_value != uval || 2798 reg->smin_value != sval || reg->smax_value != sval) { 2799 msg = "const tnum out of sync with range bounds"; 2800 goto out; 2801 } 2802 } 2803 2804 if (tnum_subreg_is_const(reg->var_off)) { 2805 u32 uval32 = tnum_subreg(reg->var_off).value; 2806 s32 sval32 = (s32)uval32; 2807 2808 if (reg->u32_min_value != uval32 || reg->u32_max_value != uval32 || 2809 reg->s32_min_value != sval32 || reg->s32_max_value != sval32) { 2810 msg = "const subreg tnum out of sync with range bounds"; 2811 goto out; 2812 } 2813 } 2814 2815 return 0; 2816 out: 2817 verifier_bug(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] " 2818 "s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)", 2819 ctx, msg, reg->umin_value, reg->umax_value, 2820 reg->smin_value, reg->smax_value, 2821 reg->u32_min_value, reg->u32_max_value, 2822 reg->s32_min_value, reg->s32_max_value, 2823 reg->var_off.value, reg->var_off.mask); 2824 if (env->test_reg_invariants) 2825 return -EFAULT; 2826 __mark_reg_unbounded(reg); 2827 return 0; 2828 } 2829 2830 static bool __reg32_bound_s64(s32 a) 2831 { 2832 return a >= 0 && a <= S32_MAX; 2833 } 2834 2835 static void __reg_assign_32_into_64(struct bpf_reg_state *reg) 2836 { 2837 reg->umin_value = reg->u32_min_value; 2838 reg->umax_value = reg->u32_max_value; 2839 2840 /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must 2841 * be positive otherwise set to worse case bounds and refine later 2842 * from tnum. 2843 */ 2844 if (__reg32_bound_s64(reg->s32_min_value) && 2845 __reg32_bound_s64(reg->s32_max_value)) { 2846 reg->smin_value = reg->s32_min_value; 2847 reg->smax_value = reg->s32_max_value; 2848 } else { 2849 reg->smin_value = 0; 2850 reg->smax_value = U32_MAX; 2851 } 2852 } 2853 2854 /* Mark a register as having a completely unknown (scalar) value. */ 2855 static void __mark_reg_unknown_imprecise(struct bpf_reg_state *reg) 2856 { 2857 /* 2858 * Clear type, off, and union(map_ptr, range) and 2859 * padding between 'type' and union 2860 */ 2861 memset(reg, 0, offsetof(struct bpf_reg_state, var_off)); 2862 reg->type = SCALAR_VALUE; 2863 reg->id = 0; 2864 reg->ref_obj_id = 0; 2865 reg->var_off = tnum_unknown; 2866 reg->frameno = 0; 2867 reg->precise = false; 2868 __mark_reg_unbounded(reg); 2869 } 2870 2871 /* Mark a register as having a completely unknown (scalar) value, 2872 * initialize .precise as true when not bpf capable. 2873 */ 2874 static void __mark_reg_unknown(const struct bpf_verifier_env *env, 2875 struct bpf_reg_state *reg) 2876 { 2877 __mark_reg_unknown_imprecise(reg); 2878 reg->precise = !env->bpf_capable; 2879 } 2880 2881 static void mark_reg_unknown(struct bpf_verifier_env *env, 2882 struct bpf_reg_state *regs, u32 regno) 2883 { 2884 if (WARN_ON(regno >= MAX_BPF_REG)) { 2885 verbose(env, "mark_reg_unknown(regs, %u)\n", regno); 2886 /* Something bad happened, let's kill all regs except FP */ 2887 for (regno = 0; regno < BPF_REG_FP; regno++) 2888 __mark_reg_not_init(env, regs + regno); 2889 return; 2890 } 2891 __mark_reg_unknown(env, regs + regno); 2892 } 2893 2894 static int __mark_reg_s32_range(struct bpf_verifier_env *env, 2895 struct bpf_reg_state *regs, 2896 u32 regno, 2897 s32 s32_min, 2898 s32 s32_max) 2899 { 2900 struct bpf_reg_state *reg = regs + regno; 2901 2902 reg->s32_min_value = max_t(s32, reg->s32_min_value, s32_min); 2903 reg->s32_max_value = min_t(s32, reg->s32_max_value, s32_max); 2904 2905 reg->smin_value = max_t(s64, reg->smin_value, s32_min); 2906 reg->smax_value = min_t(s64, reg->smax_value, s32_max); 2907 2908 reg_bounds_sync(reg); 2909 2910 return reg_bounds_sanity_check(env, reg, "s32_range"); 2911 } 2912 2913 static void __mark_reg_not_init(const struct bpf_verifier_env *env, 2914 struct bpf_reg_state *reg) 2915 { 2916 __mark_reg_unknown(env, reg); 2917 reg->type = NOT_INIT; 2918 } 2919 2920 static void mark_reg_not_init(struct bpf_verifier_env *env, 2921 struct bpf_reg_state *regs, u32 regno) 2922 { 2923 if (WARN_ON(regno >= MAX_BPF_REG)) { 2924 verbose(env, "mark_reg_not_init(regs, %u)\n", regno); 2925 /* Something bad happened, let's kill all regs except FP */ 2926 for (regno = 0; regno < BPF_REG_FP; regno++) 2927 __mark_reg_not_init(env, regs + regno); 2928 return; 2929 } 2930 __mark_reg_not_init(env, regs + regno); 2931 } 2932 2933 static int mark_btf_ld_reg(struct bpf_verifier_env *env, 2934 struct bpf_reg_state *regs, u32 regno, 2935 enum bpf_reg_type reg_type, 2936 struct btf *btf, u32 btf_id, 2937 enum bpf_type_flag flag) 2938 { 2939 switch (reg_type) { 2940 case SCALAR_VALUE: 2941 mark_reg_unknown(env, regs, regno); 2942 return 0; 2943 case PTR_TO_BTF_ID: 2944 mark_reg_known_zero(env, regs, regno); 2945 regs[regno].type = PTR_TO_BTF_ID | flag; 2946 regs[regno].btf = btf; 2947 regs[regno].btf_id = btf_id; 2948 if (type_may_be_null(flag)) 2949 regs[regno].id = ++env->id_gen; 2950 return 0; 2951 case PTR_TO_MEM: 2952 mark_reg_known_zero(env, regs, regno); 2953 regs[regno].type = PTR_TO_MEM | flag; 2954 regs[regno].mem_size = 0; 2955 return 0; 2956 default: 2957 verifier_bug(env, "unexpected reg_type %d in %s\n", reg_type, __func__); 2958 return -EFAULT; 2959 } 2960 } 2961 2962 #define DEF_NOT_SUBREG (0) 2963 static void init_reg_state(struct bpf_verifier_env *env, 2964 struct bpf_func_state *state) 2965 { 2966 struct bpf_reg_state *regs = state->regs; 2967 int i; 2968 2969 for (i = 0; i < MAX_BPF_REG; i++) { 2970 mark_reg_not_init(env, regs, i); 2971 regs[i].subreg_def = DEF_NOT_SUBREG; 2972 } 2973 2974 /* frame pointer */ 2975 regs[BPF_REG_FP].type = PTR_TO_STACK; 2976 mark_reg_known_zero(env, regs, BPF_REG_FP); 2977 regs[BPF_REG_FP].frameno = state->frameno; 2978 } 2979 2980 static struct bpf_retval_range retval_range(s32 minval, s32 maxval) 2981 { 2982 return (struct bpf_retval_range){ minval, maxval }; 2983 } 2984 2985 #define BPF_MAIN_FUNC (-1) 2986 static void init_func_state(struct bpf_verifier_env *env, 2987 struct bpf_func_state *state, 2988 int callsite, int frameno, int subprogno) 2989 { 2990 state->callsite = callsite; 2991 state->frameno = frameno; 2992 state->subprogno = subprogno; 2993 state->callback_ret_range = retval_range(0, 0); 2994 init_reg_state(env, state); 2995 mark_verifier_state_scratched(env); 2996 } 2997 2998 /* Similar to push_stack(), but for async callbacks */ 2999 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env, 3000 int insn_idx, int prev_insn_idx, 3001 int subprog, bool is_sleepable) 3002 { 3003 struct bpf_verifier_stack_elem *elem; 3004 struct bpf_func_state *frame; 3005 3006 elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT); 3007 if (!elem) 3008 return ERR_PTR(-ENOMEM); 3009 3010 elem->insn_idx = insn_idx; 3011 elem->prev_insn_idx = prev_insn_idx; 3012 elem->next = env->head; 3013 elem->log_pos = env->log.end_pos; 3014 env->head = elem; 3015 env->stack_size++; 3016 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { 3017 verbose(env, 3018 "The sequence of %d jumps is too complex for async cb.\n", 3019 env->stack_size); 3020 return ERR_PTR(-E2BIG); 3021 } 3022 /* Unlike push_stack() do not copy_verifier_state(). 3023 * The caller state doesn't matter. 3024 * This is async callback. It starts in a fresh stack. 3025 * Initialize it similar to do_check_common(). 3026 */ 3027 elem->st.branches = 1; 3028 elem->st.in_sleepable = is_sleepable; 3029 frame = kzalloc_obj(*frame, GFP_KERNEL_ACCOUNT); 3030 if (!frame) 3031 return ERR_PTR(-ENOMEM); 3032 init_func_state(env, frame, 3033 BPF_MAIN_FUNC /* callsite */, 3034 0 /* frameno within this callchain */, 3035 subprog /* subprog number within this prog */); 3036 elem->st.frame[0] = frame; 3037 return &elem->st; 3038 } 3039 3040 3041 enum reg_arg_type { 3042 SRC_OP, /* register is used as source operand */ 3043 DST_OP, /* register is used as destination operand */ 3044 DST_OP_NO_MARK /* same as above, check only, don't mark */ 3045 }; 3046 3047 static int cmp_subprogs(const void *a, const void *b) 3048 { 3049 return ((struct bpf_subprog_info *)a)->start - 3050 ((struct bpf_subprog_info *)b)->start; 3051 } 3052 3053 /* Find subprogram that contains instruction at 'off' */ 3054 struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off) 3055 { 3056 struct bpf_subprog_info *vals = env->subprog_info; 3057 int l, r, m; 3058 3059 if (off >= env->prog->len || off < 0 || env->subprog_cnt == 0) 3060 return NULL; 3061 3062 l = 0; 3063 r = env->subprog_cnt - 1; 3064 while (l < r) { 3065 m = l + (r - l + 1) / 2; 3066 if (vals[m].start <= off) 3067 l = m; 3068 else 3069 r = m - 1; 3070 } 3071 return &vals[l]; 3072 } 3073 3074 /* Find subprogram that starts exactly at 'off' */ 3075 static int find_subprog(struct bpf_verifier_env *env, int off) 3076 { 3077 struct bpf_subprog_info *p; 3078 3079 p = bpf_find_containing_subprog(env, off); 3080 if (!p || p->start != off) 3081 return -ENOENT; 3082 return p - env->subprog_info; 3083 } 3084 3085 static int add_subprog(struct bpf_verifier_env *env, int off) 3086 { 3087 int insn_cnt = env->prog->len; 3088 int ret; 3089 3090 if (off >= insn_cnt || off < 0) { 3091 verbose(env, "call to invalid destination\n"); 3092 return -EINVAL; 3093 } 3094 ret = find_subprog(env, off); 3095 if (ret >= 0) 3096 return ret; 3097 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) { 3098 verbose(env, "too many subprograms\n"); 3099 return -E2BIG; 3100 } 3101 /* determine subprog starts. The end is one before the next starts */ 3102 env->subprog_info[env->subprog_cnt++].start = off; 3103 sort(env->subprog_info, env->subprog_cnt, 3104 sizeof(env->subprog_info[0]), cmp_subprogs, NULL); 3105 return env->subprog_cnt - 1; 3106 } 3107 3108 static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env) 3109 { 3110 struct bpf_prog_aux *aux = env->prog->aux; 3111 struct btf *btf = aux->btf; 3112 const struct btf_type *t; 3113 u32 main_btf_id, id; 3114 const char *name; 3115 int ret, i; 3116 3117 /* Non-zero func_info_cnt implies valid btf */ 3118 if (!aux->func_info_cnt) 3119 return 0; 3120 main_btf_id = aux->func_info[0].type_id; 3121 3122 t = btf_type_by_id(btf, main_btf_id); 3123 if (!t) { 3124 verbose(env, "invalid btf id for main subprog in func_info\n"); 3125 return -EINVAL; 3126 } 3127 3128 name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:"); 3129 if (IS_ERR(name)) { 3130 ret = PTR_ERR(name); 3131 /* If there is no tag present, there is no exception callback */ 3132 if (ret == -ENOENT) 3133 ret = 0; 3134 else if (ret == -EEXIST) 3135 verbose(env, "multiple exception callback tags for main subprog\n"); 3136 return ret; 3137 } 3138 3139 ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC); 3140 if (ret < 0) { 3141 verbose(env, "exception callback '%s' could not be found in BTF\n", name); 3142 return ret; 3143 } 3144 id = ret; 3145 t = btf_type_by_id(btf, id); 3146 if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) { 3147 verbose(env, "exception callback '%s' must have global linkage\n", name); 3148 return -EINVAL; 3149 } 3150 ret = 0; 3151 for (i = 0; i < aux->func_info_cnt; i++) { 3152 if (aux->func_info[i].type_id != id) 3153 continue; 3154 ret = aux->func_info[i].insn_off; 3155 /* Further func_info and subprog checks will also happen 3156 * later, so assume this is the right insn_off for now. 3157 */ 3158 if (!ret) { 3159 verbose(env, "invalid exception callback insn_off in func_info: 0\n"); 3160 ret = -EINVAL; 3161 } 3162 } 3163 if (!ret) { 3164 verbose(env, "exception callback type id not found in func_info\n"); 3165 ret = -EINVAL; 3166 } 3167 return ret; 3168 } 3169 3170 #define MAX_KFUNC_DESCS 256 3171 #define MAX_KFUNC_BTFS 256 3172 3173 struct bpf_kfunc_desc { 3174 struct btf_func_model func_model; 3175 u32 func_id; 3176 s32 imm; 3177 u16 offset; 3178 unsigned long addr; 3179 }; 3180 3181 struct bpf_kfunc_btf { 3182 struct btf *btf; 3183 struct module *module; 3184 u16 offset; 3185 }; 3186 3187 struct bpf_kfunc_desc_tab { 3188 /* Sorted by func_id (BTF ID) and offset (fd_array offset) during 3189 * verification. JITs do lookups by bpf_insn, where func_id may not be 3190 * available, therefore at the end of verification do_misc_fixups() 3191 * sorts this by imm and offset. 3192 */ 3193 struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; 3194 u32 nr_descs; 3195 }; 3196 3197 struct bpf_kfunc_btf_tab { 3198 struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS]; 3199 u32 nr_descs; 3200 }; 3201 3202 static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, 3203 int insn_idx); 3204 3205 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b) 3206 { 3207 const struct bpf_kfunc_desc *d0 = a; 3208 const struct bpf_kfunc_desc *d1 = b; 3209 3210 /* func_id is not greater than BTF_MAX_TYPE */ 3211 return d0->func_id - d1->func_id ?: d0->offset - d1->offset; 3212 } 3213 3214 static int kfunc_btf_cmp_by_off(const void *a, const void *b) 3215 { 3216 const struct bpf_kfunc_btf *d0 = a; 3217 const struct bpf_kfunc_btf *d1 = b; 3218 3219 return d0->offset - d1->offset; 3220 } 3221 3222 static struct bpf_kfunc_desc * 3223 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) 3224 { 3225 struct bpf_kfunc_desc desc = { 3226 .func_id = func_id, 3227 .offset = offset, 3228 }; 3229 struct bpf_kfunc_desc_tab *tab; 3230 3231 tab = prog->aux->kfunc_tab; 3232 return bsearch(&desc, tab->descs, tab->nr_descs, 3233 sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off); 3234 } 3235 3236 int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, 3237 u16 btf_fd_idx, u8 **func_addr) 3238 { 3239 const struct bpf_kfunc_desc *desc; 3240 3241 desc = find_kfunc_desc(prog, func_id, btf_fd_idx); 3242 if (!desc) 3243 return -EFAULT; 3244 3245 *func_addr = (u8 *)desc->addr; 3246 return 0; 3247 } 3248 3249 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, 3250 s16 offset) 3251 { 3252 struct bpf_kfunc_btf kf_btf = { .offset = offset }; 3253 struct bpf_kfunc_btf_tab *tab; 3254 struct bpf_kfunc_btf *b; 3255 struct module *mod; 3256 struct btf *btf; 3257 int btf_fd; 3258 3259 tab = env->prog->aux->kfunc_btf_tab; 3260 b = bsearch(&kf_btf, tab->descs, tab->nr_descs, 3261 sizeof(tab->descs[0]), kfunc_btf_cmp_by_off); 3262 if (!b) { 3263 if (tab->nr_descs == MAX_KFUNC_BTFS) { 3264 verbose(env, "too many different module BTFs\n"); 3265 return ERR_PTR(-E2BIG); 3266 } 3267 3268 if (bpfptr_is_null(env->fd_array)) { 3269 verbose(env, "kfunc offset > 0 without fd_array is invalid\n"); 3270 return ERR_PTR(-EPROTO); 3271 } 3272 3273 if (copy_from_bpfptr_offset(&btf_fd, env->fd_array, 3274 offset * sizeof(btf_fd), 3275 sizeof(btf_fd))) 3276 return ERR_PTR(-EFAULT); 3277 3278 btf = btf_get_by_fd(btf_fd); 3279 if (IS_ERR(btf)) { 3280 verbose(env, "invalid module BTF fd specified\n"); 3281 return btf; 3282 } 3283 3284 if (!btf_is_module(btf)) { 3285 verbose(env, "BTF fd for kfunc is not a module BTF\n"); 3286 btf_put(btf); 3287 return ERR_PTR(-EINVAL); 3288 } 3289 3290 mod = btf_try_get_module(btf); 3291 if (!mod) { 3292 btf_put(btf); 3293 return ERR_PTR(-ENXIO); 3294 } 3295 3296 b = &tab->descs[tab->nr_descs++]; 3297 b->btf = btf; 3298 b->module = mod; 3299 b->offset = offset; 3300 3301 /* sort() reorders entries by value, so b may no longer point 3302 * to the right entry after this 3303 */ 3304 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), 3305 kfunc_btf_cmp_by_off, NULL); 3306 } else { 3307 btf = b->btf; 3308 } 3309 3310 return btf; 3311 } 3312 3313 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) 3314 { 3315 if (!tab) 3316 return; 3317 3318 while (tab->nr_descs--) { 3319 module_put(tab->descs[tab->nr_descs].module); 3320 btf_put(tab->descs[tab->nr_descs].btf); 3321 } 3322 kfree(tab); 3323 } 3324 3325 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset) 3326 { 3327 if (offset) { 3328 if (offset < 0) { 3329 /* In the future, this can be allowed to increase limit 3330 * of fd index into fd_array, interpreted as u16. 3331 */ 3332 verbose(env, "negative offset disallowed for kernel module function call\n"); 3333 return ERR_PTR(-EINVAL); 3334 } 3335 3336 return __find_kfunc_desc_btf(env, offset); 3337 } 3338 return btf_vmlinux ?: ERR_PTR(-ENOENT); 3339 } 3340 3341 #define KF_IMPL_SUFFIX "_impl" 3342 3343 static const struct btf_type *find_kfunc_impl_proto(struct bpf_verifier_env *env, 3344 struct btf *btf, 3345 const char *func_name) 3346 { 3347 char *buf = env->tmp_str_buf; 3348 const struct btf_type *func; 3349 s32 impl_id; 3350 int len; 3351 3352 len = snprintf(buf, TMP_STR_BUF_LEN, "%s%s", func_name, KF_IMPL_SUFFIX); 3353 if (len < 0 || len >= TMP_STR_BUF_LEN) { 3354 verbose(env, "function name %s%s is too long\n", func_name, KF_IMPL_SUFFIX); 3355 return NULL; 3356 } 3357 3358 impl_id = btf_find_by_name_kind(btf, buf, BTF_KIND_FUNC); 3359 if (impl_id <= 0) { 3360 verbose(env, "cannot find function %s in BTF\n", buf); 3361 return NULL; 3362 } 3363 3364 func = btf_type_by_id(btf, impl_id); 3365 3366 return btf_type_by_id(btf, func->type); 3367 } 3368 3369 static int fetch_kfunc_meta(struct bpf_verifier_env *env, 3370 s32 func_id, 3371 s16 offset, 3372 struct bpf_kfunc_meta *kfunc) 3373 { 3374 const struct btf_type *func, *func_proto; 3375 const char *func_name; 3376 u32 *kfunc_flags; 3377 struct btf *btf; 3378 3379 if (func_id <= 0) { 3380 verbose(env, "invalid kernel function btf_id %d\n", func_id); 3381 return -EINVAL; 3382 } 3383 3384 btf = find_kfunc_desc_btf(env, offset); 3385 if (IS_ERR(btf)) { 3386 verbose(env, "failed to find BTF for kernel function\n"); 3387 return PTR_ERR(btf); 3388 } 3389 3390 /* 3391 * Note that kfunc_flags may be NULL at this point, which 3392 * means that we couldn't find func_id in any relevant 3393 * kfunc_id_set. This most likely indicates an invalid kfunc 3394 * call. However we don't fail with an error here, 3395 * and let the caller decide what to do with NULL kfunc->flags. 3396 */ 3397 kfunc_flags = btf_kfunc_flags(btf, func_id, env->prog); 3398 3399 func = btf_type_by_id(btf, func_id); 3400 if (!func || !btf_type_is_func(func)) { 3401 verbose(env, "kernel btf_id %d is not a function\n", func_id); 3402 return -EINVAL; 3403 } 3404 3405 func_name = btf_name_by_offset(btf, func->name_off); 3406 3407 /* 3408 * An actual prototype of a kfunc with KF_IMPLICIT_ARGS flag 3409 * can be found through the counterpart _impl kfunc. 3410 */ 3411 if (kfunc_flags && (*kfunc_flags & KF_IMPLICIT_ARGS)) 3412 func_proto = find_kfunc_impl_proto(env, btf, func_name); 3413 else 3414 func_proto = btf_type_by_id(btf, func->type); 3415 3416 if (!func_proto || !btf_type_is_func_proto(func_proto)) { 3417 verbose(env, "kernel function btf_id %d does not have a valid func_proto\n", 3418 func_id); 3419 return -EINVAL; 3420 } 3421 3422 memset(kfunc, 0, sizeof(*kfunc)); 3423 kfunc->btf = btf; 3424 kfunc->id = func_id; 3425 kfunc->name = func_name; 3426 kfunc->proto = func_proto; 3427 kfunc->flags = kfunc_flags; 3428 3429 return 0; 3430 } 3431 3432 static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) 3433 { 3434 struct bpf_kfunc_btf_tab *btf_tab; 3435 struct btf_func_model func_model; 3436 struct bpf_kfunc_desc_tab *tab; 3437 struct bpf_prog_aux *prog_aux; 3438 struct bpf_kfunc_meta kfunc; 3439 struct bpf_kfunc_desc *desc; 3440 unsigned long addr; 3441 int err; 3442 3443 prog_aux = env->prog->aux; 3444 tab = prog_aux->kfunc_tab; 3445 btf_tab = prog_aux->kfunc_btf_tab; 3446 if (!tab) { 3447 if (!btf_vmlinux) { 3448 verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n"); 3449 return -ENOTSUPP; 3450 } 3451 3452 if (!env->prog->jit_requested) { 3453 verbose(env, "JIT is required for calling kernel function\n"); 3454 return -ENOTSUPP; 3455 } 3456 3457 if (!bpf_jit_supports_kfunc_call()) { 3458 verbose(env, "JIT does not support calling kernel function\n"); 3459 return -ENOTSUPP; 3460 } 3461 3462 if (!env->prog->gpl_compatible) { 3463 verbose(env, "cannot call kernel function from non-GPL compatible program\n"); 3464 return -EINVAL; 3465 } 3466 3467 tab = kzalloc_obj(*tab, GFP_KERNEL_ACCOUNT); 3468 if (!tab) 3469 return -ENOMEM; 3470 prog_aux->kfunc_tab = tab; 3471 } 3472 3473 /* func_id == 0 is always invalid, but instead of returning an error, be 3474 * conservative and wait until the code elimination pass before returning 3475 * error, so that invalid calls that get pruned out can be in BPF programs 3476 * loaded from userspace. It is also required that offset be untouched 3477 * for such calls. 3478 */ 3479 if (!func_id && !offset) 3480 return 0; 3481 3482 if (!btf_tab && offset) { 3483 btf_tab = kzalloc_obj(*btf_tab, GFP_KERNEL_ACCOUNT); 3484 if (!btf_tab) 3485 return -ENOMEM; 3486 prog_aux->kfunc_btf_tab = btf_tab; 3487 } 3488 3489 if (find_kfunc_desc(env->prog, func_id, offset)) 3490 return 0; 3491 3492 if (tab->nr_descs == MAX_KFUNC_DESCS) { 3493 verbose(env, "too many different kernel function calls\n"); 3494 return -E2BIG; 3495 } 3496 3497 err = fetch_kfunc_meta(env, func_id, offset, &kfunc); 3498 if (err) 3499 return err; 3500 3501 addr = kallsyms_lookup_name(kfunc.name); 3502 if (!addr) { 3503 verbose(env, "cannot find address for kernel function %s\n", kfunc.name); 3504 return -EINVAL; 3505 } 3506 3507 if (bpf_dev_bound_kfunc_id(func_id)) { 3508 err = bpf_dev_bound_kfunc_check(&env->log, prog_aux); 3509 if (err) 3510 return err; 3511 } 3512 3513 err = btf_distill_func_proto(&env->log, kfunc.btf, kfunc.proto, kfunc.name, &func_model); 3514 if (err) 3515 return err; 3516 3517 desc = &tab->descs[tab->nr_descs++]; 3518 desc->func_id = func_id; 3519 desc->offset = offset; 3520 desc->addr = addr; 3521 desc->func_model = func_model; 3522 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), 3523 kfunc_desc_cmp_by_id_off, NULL); 3524 return 0; 3525 } 3526 3527 static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b) 3528 { 3529 const struct bpf_kfunc_desc *d0 = a; 3530 const struct bpf_kfunc_desc *d1 = b; 3531 3532 if (d0->imm != d1->imm) 3533 return d0->imm < d1->imm ? -1 : 1; 3534 if (d0->offset != d1->offset) 3535 return d0->offset < d1->offset ? -1 : 1; 3536 return 0; 3537 } 3538 3539 static int set_kfunc_desc_imm(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc) 3540 { 3541 unsigned long call_imm; 3542 3543 if (bpf_jit_supports_far_kfunc_call()) { 3544 call_imm = desc->func_id; 3545 } else { 3546 call_imm = BPF_CALL_IMM(desc->addr); 3547 /* Check whether the relative offset overflows desc->imm */ 3548 if ((unsigned long)(s32)call_imm != call_imm) { 3549 verbose(env, "address of kernel func_id %u is out of range\n", 3550 desc->func_id); 3551 return -EINVAL; 3552 } 3553 } 3554 desc->imm = call_imm; 3555 return 0; 3556 } 3557 3558 static int sort_kfunc_descs_by_imm_off(struct bpf_verifier_env *env) 3559 { 3560 struct bpf_kfunc_desc_tab *tab; 3561 int i, err; 3562 3563 tab = env->prog->aux->kfunc_tab; 3564 if (!tab) 3565 return 0; 3566 3567 for (i = 0; i < tab->nr_descs; i++) { 3568 err = set_kfunc_desc_imm(env, &tab->descs[i]); 3569 if (err) 3570 return err; 3571 } 3572 3573 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), 3574 kfunc_desc_cmp_by_imm_off, NULL); 3575 return 0; 3576 } 3577 3578 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) 3579 { 3580 return !!prog->aux->kfunc_tab; 3581 } 3582 3583 const struct btf_func_model * 3584 bpf_jit_find_kfunc_model(const struct bpf_prog *prog, 3585 const struct bpf_insn *insn) 3586 { 3587 const struct bpf_kfunc_desc desc = { 3588 .imm = insn->imm, 3589 .offset = insn->off, 3590 }; 3591 const struct bpf_kfunc_desc *res; 3592 struct bpf_kfunc_desc_tab *tab; 3593 3594 tab = prog->aux->kfunc_tab; 3595 res = bsearch(&desc, tab->descs, tab->nr_descs, 3596 sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off); 3597 3598 return res ? &res->func_model : NULL; 3599 } 3600 3601 static int add_kfunc_in_insns(struct bpf_verifier_env *env, 3602 struct bpf_insn *insn, int cnt) 3603 { 3604 int i, ret; 3605 3606 for (i = 0; i < cnt; i++, insn++) { 3607 if (bpf_pseudo_kfunc_call(insn)) { 3608 ret = add_kfunc_call(env, insn->imm, insn->off); 3609 if (ret < 0) 3610 return ret; 3611 } 3612 } 3613 return 0; 3614 } 3615 3616 static int add_subprog_and_kfunc(struct bpf_verifier_env *env) 3617 { 3618 struct bpf_subprog_info *subprog = env->subprog_info; 3619 int i, ret, insn_cnt = env->prog->len, ex_cb_insn; 3620 struct bpf_insn *insn = env->prog->insnsi; 3621 3622 /* Add entry function. */ 3623 ret = add_subprog(env, 0); 3624 if (ret) 3625 return ret; 3626 3627 for (i = 0; i < insn_cnt; i++, insn++) { 3628 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) && 3629 !bpf_pseudo_kfunc_call(insn)) 3630 continue; 3631 3632 if (!env->bpf_capable) { 3633 verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); 3634 return -EPERM; 3635 } 3636 3637 if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn)) 3638 ret = add_subprog(env, i + insn->imm + 1); 3639 else 3640 ret = add_kfunc_call(env, insn->imm, insn->off); 3641 3642 if (ret < 0) 3643 return ret; 3644 } 3645 3646 ret = bpf_find_exception_callback_insn_off(env); 3647 if (ret < 0) 3648 return ret; 3649 ex_cb_insn = ret; 3650 3651 /* If ex_cb_insn > 0, this means that the main program has a subprog 3652 * marked using BTF decl tag to serve as the exception callback. 3653 */ 3654 if (ex_cb_insn) { 3655 ret = add_subprog(env, ex_cb_insn); 3656 if (ret < 0) 3657 return ret; 3658 for (i = 1; i < env->subprog_cnt; i++) { 3659 if (env->subprog_info[i].start != ex_cb_insn) 3660 continue; 3661 env->exception_callback_subprog = i; 3662 mark_subprog_exc_cb(env, i); 3663 break; 3664 } 3665 } 3666 3667 /* Add a fake 'exit' subprog which could simplify subprog iteration 3668 * logic. 'subprog_cnt' should not be increased. 3669 */ 3670 subprog[env->subprog_cnt].start = insn_cnt; 3671 3672 if (env->log.level & BPF_LOG_LEVEL2) 3673 for (i = 0; i < env->subprog_cnt; i++) 3674 verbose(env, "func#%d @%d\n", i, subprog[i].start); 3675 3676 return 0; 3677 } 3678 3679 static int check_subprogs(struct bpf_verifier_env *env) 3680 { 3681 int i, subprog_start, subprog_end, off, cur_subprog = 0; 3682 struct bpf_subprog_info *subprog = env->subprog_info; 3683 struct bpf_insn *insn = env->prog->insnsi; 3684 int insn_cnt = env->prog->len; 3685 3686 /* now check that all jumps are within the same subprog */ 3687 subprog_start = subprog[cur_subprog].start; 3688 subprog_end = subprog[cur_subprog + 1].start; 3689 for (i = 0; i < insn_cnt; i++) { 3690 u8 code = insn[i].code; 3691 3692 if (code == (BPF_JMP | BPF_CALL) && 3693 insn[i].src_reg == 0 && 3694 insn[i].imm == BPF_FUNC_tail_call) { 3695 subprog[cur_subprog].has_tail_call = true; 3696 subprog[cur_subprog].tail_call_reachable = true; 3697 } 3698 if (BPF_CLASS(code) == BPF_LD && 3699 (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND)) 3700 subprog[cur_subprog].has_ld_abs = true; 3701 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) 3702 goto next; 3703 if (BPF_OP(code) == BPF_CALL) 3704 goto next; 3705 if (BPF_OP(code) == BPF_EXIT) { 3706 subprog[cur_subprog].exit_idx = i; 3707 goto next; 3708 } 3709 off = i + bpf_jmp_offset(&insn[i]) + 1; 3710 if (off < subprog_start || off >= subprog_end) { 3711 verbose(env, "jump out of range from insn %d to %d\n", i, off); 3712 return -EINVAL; 3713 } 3714 next: 3715 if (i == subprog_end - 1) { 3716 /* to avoid fall-through from one subprog into another 3717 * the last insn of the subprog should be either exit 3718 * or unconditional jump back or bpf_throw call 3719 */ 3720 if (code != (BPF_JMP | BPF_EXIT) && 3721 code != (BPF_JMP32 | BPF_JA) && 3722 code != (BPF_JMP | BPF_JA)) { 3723 verbose(env, "last insn is not an exit or jmp\n"); 3724 return -EINVAL; 3725 } 3726 subprog_start = subprog_end; 3727 cur_subprog++; 3728 if (cur_subprog < env->subprog_cnt) 3729 subprog_end = subprog[cur_subprog + 1].start; 3730 } 3731 } 3732 return 0; 3733 } 3734 3735 static int mark_stack_slot_obj_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 3736 int spi, int nr_slots) 3737 { 3738 int err, i; 3739 3740 for (i = 0; i < nr_slots; i++) { 3741 err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx, BIT(spi - i)); 3742 if (err) 3743 return err; 3744 mark_stack_slot_scratched(env, spi - i); 3745 } 3746 return 0; 3747 } 3748 3749 static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 3750 { 3751 int spi; 3752 3753 /* For CONST_PTR_TO_DYNPTR, it must have already been done by 3754 * check_reg_arg in check_helper_call and mark_btf_func_reg_size in 3755 * check_kfunc_call. 3756 */ 3757 if (reg->type == CONST_PTR_TO_DYNPTR) 3758 return 0; 3759 spi = dynptr_get_spi(env, reg); 3760 if (spi < 0) 3761 return spi; 3762 /* Caller ensures dynptr is valid and initialized, which means spi is in 3763 * bounds and spi is the first dynptr slot. Simply mark stack slot as 3764 * read. 3765 */ 3766 return mark_stack_slot_obj_read(env, reg, spi, BPF_DYNPTR_NR_SLOTS); 3767 } 3768 3769 static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 3770 int spi, int nr_slots) 3771 { 3772 return mark_stack_slot_obj_read(env, reg, spi, nr_slots); 3773 } 3774 3775 static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 3776 { 3777 int spi; 3778 3779 spi = irq_flag_get_spi(env, reg); 3780 if (spi < 0) 3781 return spi; 3782 return mark_stack_slot_obj_read(env, reg, spi, 1); 3783 } 3784 3785 /* This function is supposed to be used by the following 32-bit optimization 3786 * code only. It returns TRUE if the source or destination register operates 3787 * on 64-bit, otherwise return FALSE. 3788 */ 3789 static bool is_reg64(struct bpf_insn *insn, 3790 u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t) 3791 { 3792 u8 code, class, op; 3793 3794 code = insn->code; 3795 class = BPF_CLASS(code); 3796 op = BPF_OP(code); 3797 if (class == BPF_JMP) { 3798 /* BPF_EXIT for "main" will reach here. Return TRUE 3799 * conservatively. 3800 */ 3801 if (op == BPF_EXIT) 3802 return true; 3803 if (op == BPF_CALL) { 3804 /* BPF to BPF call will reach here because of marking 3805 * caller saved clobber with DST_OP_NO_MARK for which we 3806 * don't care the register def because they are anyway 3807 * marked as NOT_INIT already. 3808 */ 3809 if (insn->src_reg == BPF_PSEUDO_CALL) 3810 return false; 3811 /* Helper call will reach here because of arg type 3812 * check, conservatively return TRUE. 3813 */ 3814 if (t == SRC_OP) 3815 return true; 3816 3817 return false; 3818 } 3819 } 3820 3821 if (class == BPF_ALU64 && op == BPF_END && (insn->imm == 16 || insn->imm == 32)) 3822 return false; 3823 3824 if (class == BPF_ALU64 || class == BPF_JMP || 3825 (class == BPF_ALU && op == BPF_END && insn->imm == 64)) 3826 return true; 3827 3828 if (class == BPF_ALU || class == BPF_JMP32) 3829 return false; 3830 3831 if (class == BPF_LDX) { 3832 if (t != SRC_OP) 3833 return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX; 3834 /* LDX source must be ptr. */ 3835 return true; 3836 } 3837 3838 if (class == BPF_STX) { 3839 /* BPF_STX (including atomic variants) has one or more source 3840 * operands, one of which is a ptr. Check whether the caller is 3841 * asking about it. 3842 */ 3843 if (t == SRC_OP && reg->type != SCALAR_VALUE) 3844 return true; 3845 return BPF_SIZE(code) == BPF_DW; 3846 } 3847 3848 if (class == BPF_LD) { 3849 u8 mode = BPF_MODE(code); 3850 3851 /* LD_IMM64 */ 3852 if (mode == BPF_IMM) 3853 return true; 3854 3855 /* Both LD_IND and LD_ABS return 32-bit data. */ 3856 if (t != SRC_OP) 3857 return false; 3858 3859 /* Implicit ctx ptr. */ 3860 if (regno == BPF_REG_6) 3861 return true; 3862 3863 /* Explicit source could be any width. */ 3864 return true; 3865 } 3866 3867 if (class == BPF_ST) 3868 /* The only source register for BPF_ST is a ptr. */ 3869 return true; 3870 3871 /* Conservatively return true at default. */ 3872 return true; 3873 } 3874 3875 /* Return the regno defined by the insn, or -1. */ 3876 static int insn_def_regno(const struct bpf_insn *insn) 3877 { 3878 switch (BPF_CLASS(insn->code)) { 3879 case BPF_JMP: 3880 case BPF_JMP32: 3881 case BPF_ST: 3882 return -1; 3883 case BPF_STX: 3884 if (BPF_MODE(insn->code) == BPF_ATOMIC || 3885 BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) { 3886 if (insn->imm == BPF_CMPXCHG) 3887 return BPF_REG_0; 3888 else if (insn->imm == BPF_LOAD_ACQ) 3889 return insn->dst_reg; 3890 else if (insn->imm & BPF_FETCH) 3891 return insn->src_reg; 3892 } 3893 return -1; 3894 default: 3895 return insn->dst_reg; 3896 } 3897 } 3898 3899 /* Return TRUE if INSN has defined any 32-bit value explicitly. */ 3900 static bool insn_has_def32(struct bpf_insn *insn) 3901 { 3902 int dst_reg = insn_def_regno(insn); 3903 3904 if (dst_reg == -1) 3905 return false; 3906 3907 return !is_reg64(insn, dst_reg, NULL, DST_OP); 3908 } 3909 3910 static void mark_insn_zext(struct bpf_verifier_env *env, 3911 struct bpf_reg_state *reg) 3912 { 3913 s32 def_idx = reg->subreg_def; 3914 3915 if (def_idx == DEF_NOT_SUBREG) 3916 return; 3917 3918 env->insn_aux_data[def_idx - 1].zext_dst = true; 3919 /* The dst will be zero extended, so won't be sub-register anymore. */ 3920 reg->subreg_def = DEF_NOT_SUBREG; 3921 } 3922 3923 static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno, 3924 enum reg_arg_type t) 3925 { 3926 struct bpf_insn *insn = env->prog->insnsi + env->insn_idx; 3927 struct bpf_reg_state *reg; 3928 bool rw64; 3929 3930 if (regno >= MAX_BPF_REG) { 3931 verbose(env, "R%d is invalid\n", regno); 3932 return -EINVAL; 3933 } 3934 3935 mark_reg_scratched(env, regno); 3936 3937 reg = ®s[regno]; 3938 rw64 = is_reg64(insn, regno, reg, t); 3939 if (t == SRC_OP) { 3940 /* check whether register used as source operand can be read */ 3941 if (reg->type == NOT_INIT) { 3942 verbose(env, "R%d !read_ok\n", regno); 3943 return -EACCES; 3944 } 3945 /* We don't need to worry about FP liveness because it's read-only */ 3946 if (regno == BPF_REG_FP) 3947 return 0; 3948 3949 if (rw64) 3950 mark_insn_zext(env, reg); 3951 3952 return 0; 3953 } else { 3954 /* check whether register used as dest operand can be written to */ 3955 if (regno == BPF_REG_FP) { 3956 verbose(env, "frame pointer is read only\n"); 3957 return -EACCES; 3958 } 3959 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1; 3960 if (t == DST_OP) 3961 mark_reg_unknown(env, regs, regno); 3962 } 3963 return 0; 3964 } 3965 3966 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, 3967 enum reg_arg_type t) 3968 { 3969 struct bpf_verifier_state *vstate = env->cur_state; 3970 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 3971 3972 return __check_reg_arg(env, state->regs, regno, t); 3973 } 3974 3975 static int insn_stack_access_flags(int frameno, int spi) 3976 { 3977 return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno; 3978 } 3979 3980 static int insn_stack_access_spi(int insn_flags) 3981 { 3982 return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK; 3983 } 3984 3985 static int insn_stack_access_frameno(int insn_flags) 3986 { 3987 return insn_flags & INSN_F_FRAMENO_MASK; 3988 } 3989 3990 static void mark_jmp_point(struct bpf_verifier_env *env, int idx) 3991 { 3992 env->insn_aux_data[idx].jmp_point = true; 3993 } 3994 3995 static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx) 3996 { 3997 return env->insn_aux_data[insn_idx].jmp_point; 3998 } 3999 4000 #define LR_FRAMENO_BITS 3 4001 #define LR_SPI_BITS 6 4002 #define LR_ENTRY_BITS (LR_SPI_BITS + LR_FRAMENO_BITS + 1) 4003 #define LR_SIZE_BITS 4 4004 #define LR_FRAMENO_MASK ((1ull << LR_FRAMENO_BITS) - 1) 4005 #define LR_SPI_MASK ((1ull << LR_SPI_BITS) - 1) 4006 #define LR_SIZE_MASK ((1ull << LR_SIZE_BITS) - 1) 4007 #define LR_SPI_OFF LR_FRAMENO_BITS 4008 #define LR_IS_REG_OFF (LR_SPI_BITS + LR_FRAMENO_BITS) 4009 #define LINKED_REGS_MAX 6 4010 4011 struct linked_reg { 4012 u8 frameno; 4013 union { 4014 u8 spi; 4015 u8 regno; 4016 }; 4017 bool is_reg; 4018 }; 4019 4020 struct linked_regs { 4021 int cnt; 4022 struct linked_reg entries[LINKED_REGS_MAX]; 4023 }; 4024 4025 static struct linked_reg *linked_regs_push(struct linked_regs *s) 4026 { 4027 if (s->cnt < LINKED_REGS_MAX) 4028 return &s->entries[s->cnt++]; 4029 4030 return NULL; 4031 } 4032 4033 /* Use u64 as a vector of 6 10-bit values, use first 4-bits to track 4034 * number of elements currently in stack. 4035 * Pack one history entry for linked registers as 10 bits in the following format: 4036 * - 3-bits frameno 4037 * - 6-bits spi_or_reg 4038 * - 1-bit is_reg 4039 */ 4040 static u64 linked_regs_pack(struct linked_regs *s) 4041 { 4042 u64 val = 0; 4043 int i; 4044 4045 for (i = 0; i < s->cnt; ++i) { 4046 struct linked_reg *e = &s->entries[i]; 4047 u64 tmp = 0; 4048 4049 tmp |= e->frameno; 4050 tmp |= e->spi << LR_SPI_OFF; 4051 tmp |= (e->is_reg ? 1 : 0) << LR_IS_REG_OFF; 4052 4053 val <<= LR_ENTRY_BITS; 4054 val |= tmp; 4055 } 4056 val <<= LR_SIZE_BITS; 4057 val |= s->cnt; 4058 return val; 4059 } 4060 4061 static void linked_regs_unpack(u64 val, struct linked_regs *s) 4062 { 4063 int i; 4064 4065 s->cnt = val & LR_SIZE_MASK; 4066 val >>= LR_SIZE_BITS; 4067 4068 for (i = 0; i < s->cnt; ++i) { 4069 struct linked_reg *e = &s->entries[i]; 4070 4071 e->frameno = val & LR_FRAMENO_MASK; 4072 e->spi = (val >> LR_SPI_OFF) & LR_SPI_MASK; 4073 e->is_reg = (val >> LR_IS_REG_OFF) & 0x1; 4074 val >>= LR_ENTRY_BITS; 4075 } 4076 } 4077 4078 /* for any branch, call, exit record the history of jmps in the given state */ 4079 static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur, 4080 int insn_flags, u64 linked_regs) 4081 { 4082 u32 cnt = cur->jmp_history_cnt; 4083 struct bpf_jmp_history_entry *p; 4084 size_t alloc_size; 4085 4086 /* combine instruction flags if we already recorded this instruction */ 4087 if (env->cur_hist_ent) { 4088 /* atomic instructions push insn_flags twice, for READ and 4089 * WRITE sides, but they should agree on stack slot 4090 */ 4091 verifier_bug_if((env->cur_hist_ent->flags & insn_flags) && 4092 (env->cur_hist_ent->flags & insn_flags) != insn_flags, 4093 env, "insn history: insn_idx %d cur flags %x new flags %x", 4094 env->insn_idx, env->cur_hist_ent->flags, insn_flags); 4095 env->cur_hist_ent->flags |= insn_flags; 4096 verifier_bug_if(env->cur_hist_ent->linked_regs != 0, env, 4097 "insn history: insn_idx %d linked_regs: %#llx", 4098 env->insn_idx, env->cur_hist_ent->linked_regs); 4099 env->cur_hist_ent->linked_regs = linked_regs; 4100 return 0; 4101 } 4102 4103 cnt++; 4104 alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p))); 4105 p = krealloc(cur->jmp_history, alloc_size, GFP_KERNEL_ACCOUNT); 4106 if (!p) 4107 return -ENOMEM; 4108 cur->jmp_history = p; 4109 4110 p = &cur->jmp_history[cnt - 1]; 4111 p->idx = env->insn_idx; 4112 p->prev_idx = env->prev_insn_idx; 4113 p->flags = insn_flags; 4114 p->linked_regs = linked_regs; 4115 cur->jmp_history_cnt = cnt; 4116 env->cur_hist_ent = p; 4117 4118 return 0; 4119 } 4120 4121 static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st, 4122 u32 hist_end, int insn_idx) 4123 { 4124 if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx) 4125 return &st->jmp_history[hist_end - 1]; 4126 return NULL; 4127 } 4128 4129 /* Backtrack one insn at a time. If idx is not at the top of recorded 4130 * history then previous instruction came from straight line execution. 4131 * Return -ENOENT if we exhausted all instructions within given state. 4132 * 4133 * It's legal to have a bit of a looping with the same starting and ending 4134 * insn index within the same state, e.g.: 3->4->5->3, so just because current 4135 * instruction index is the same as state's first_idx doesn't mean we are 4136 * done. If there is still some jump history left, we should keep going. We 4137 * need to take into account that we might have a jump history between given 4138 * state's parent and itself, due to checkpointing. In this case, we'll have 4139 * history entry recording a jump from last instruction of parent state and 4140 * first instruction of given state. 4141 */ 4142 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, 4143 u32 *history) 4144 { 4145 u32 cnt = *history; 4146 4147 if (i == st->first_insn_idx) { 4148 if (cnt == 0) 4149 return -ENOENT; 4150 if (cnt == 1 && st->jmp_history[0].idx == i) 4151 return -ENOENT; 4152 } 4153 4154 if (cnt && st->jmp_history[cnt - 1].idx == i) { 4155 i = st->jmp_history[cnt - 1].prev_idx; 4156 (*history)--; 4157 } else { 4158 i--; 4159 } 4160 return i; 4161 } 4162 4163 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) 4164 { 4165 const struct btf_type *func; 4166 struct btf *desc_btf; 4167 4168 if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) 4169 return NULL; 4170 4171 desc_btf = find_kfunc_desc_btf(data, insn->off); 4172 if (IS_ERR(desc_btf)) 4173 return "<error>"; 4174 4175 func = btf_type_by_id(desc_btf, insn->imm); 4176 return btf_name_by_offset(desc_btf, func->name_off); 4177 } 4178 4179 static void verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn) 4180 { 4181 const struct bpf_insn_cbs cbs = { 4182 .cb_call = disasm_kfunc_name, 4183 .cb_print = verbose, 4184 .private_data = env, 4185 }; 4186 4187 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); 4188 } 4189 4190 static inline void bt_init(struct backtrack_state *bt, u32 frame) 4191 { 4192 bt->frame = frame; 4193 } 4194 4195 static inline void bt_reset(struct backtrack_state *bt) 4196 { 4197 struct bpf_verifier_env *env = bt->env; 4198 4199 memset(bt, 0, sizeof(*bt)); 4200 bt->env = env; 4201 } 4202 4203 static inline u32 bt_empty(struct backtrack_state *bt) 4204 { 4205 u64 mask = 0; 4206 int i; 4207 4208 for (i = 0; i <= bt->frame; i++) 4209 mask |= bt->reg_masks[i] | bt->stack_masks[i]; 4210 4211 return mask == 0; 4212 } 4213 4214 static inline int bt_subprog_enter(struct backtrack_state *bt) 4215 { 4216 if (bt->frame == MAX_CALL_FRAMES - 1) { 4217 verifier_bug(bt->env, "subprog enter from frame %d", bt->frame); 4218 return -EFAULT; 4219 } 4220 bt->frame++; 4221 return 0; 4222 } 4223 4224 static inline int bt_subprog_exit(struct backtrack_state *bt) 4225 { 4226 if (bt->frame == 0) { 4227 verifier_bug(bt->env, "subprog exit from frame 0"); 4228 return -EFAULT; 4229 } 4230 bt->frame--; 4231 return 0; 4232 } 4233 4234 static inline void bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg) 4235 { 4236 bt->reg_masks[frame] |= 1 << reg; 4237 } 4238 4239 static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg) 4240 { 4241 bt->reg_masks[frame] &= ~(1 << reg); 4242 } 4243 4244 static inline void bt_set_reg(struct backtrack_state *bt, u32 reg) 4245 { 4246 bt_set_frame_reg(bt, bt->frame, reg); 4247 } 4248 4249 static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg) 4250 { 4251 bt_clear_frame_reg(bt, bt->frame, reg); 4252 } 4253 4254 static inline void bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot) 4255 { 4256 bt->stack_masks[frame] |= 1ull << slot; 4257 } 4258 4259 static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot) 4260 { 4261 bt->stack_masks[frame] &= ~(1ull << slot); 4262 } 4263 4264 static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame) 4265 { 4266 return bt->reg_masks[frame]; 4267 } 4268 4269 static inline u32 bt_reg_mask(struct backtrack_state *bt) 4270 { 4271 return bt->reg_masks[bt->frame]; 4272 } 4273 4274 static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame) 4275 { 4276 return bt->stack_masks[frame]; 4277 } 4278 4279 static inline u64 bt_stack_mask(struct backtrack_state *bt) 4280 { 4281 return bt->stack_masks[bt->frame]; 4282 } 4283 4284 static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg) 4285 { 4286 return bt->reg_masks[bt->frame] & (1 << reg); 4287 } 4288 4289 static inline bool bt_is_frame_reg_set(struct backtrack_state *bt, u32 frame, u32 reg) 4290 { 4291 return bt->reg_masks[frame] & (1 << reg); 4292 } 4293 4294 static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot) 4295 { 4296 return bt->stack_masks[frame] & (1ull << slot); 4297 } 4298 4299 /* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */ 4300 static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask) 4301 { 4302 DECLARE_BITMAP(mask, 64); 4303 bool first = true; 4304 int i, n; 4305 4306 buf[0] = '\0'; 4307 4308 bitmap_from_u64(mask, reg_mask); 4309 for_each_set_bit(i, mask, 32) { 4310 n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i); 4311 first = false; 4312 buf += n; 4313 buf_sz -= n; 4314 if (buf_sz < 0) 4315 break; 4316 } 4317 } 4318 /* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */ 4319 void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask) 4320 { 4321 DECLARE_BITMAP(mask, 64); 4322 bool first = true; 4323 int i, n; 4324 4325 buf[0] = '\0'; 4326 4327 bitmap_from_u64(mask, stack_mask); 4328 for_each_set_bit(i, mask, 64) { 4329 n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8); 4330 first = false; 4331 buf += n; 4332 buf_sz -= n; 4333 if (buf_sz < 0) 4334 break; 4335 } 4336 } 4337 4338 /* If any register R in hist->linked_regs is marked as precise in bt, 4339 * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs. 4340 */ 4341 static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist) 4342 { 4343 struct linked_regs linked_regs; 4344 bool some_precise = false; 4345 int i; 4346 4347 if (!hist || hist->linked_regs == 0) 4348 return; 4349 4350 linked_regs_unpack(hist->linked_regs, &linked_regs); 4351 for (i = 0; i < linked_regs.cnt; ++i) { 4352 struct linked_reg *e = &linked_regs.entries[i]; 4353 4354 if ((e->is_reg && bt_is_frame_reg_set(bt, e->frameno, e->regno)) || 4355 (!e->is_reg && bt_is_frame_slot_set(bt, e->frameno, e->spi))) { 4356 some_precise = true; 4357 break; 4358 } 4359 } 4360 4361 if (!some_precise) 4362 return; 4363 4364 for (i = 0; i < linked_regs.cnt; ++i) { 4365 struct linked_reg *e = &linked_regs.entries[i]; 4366 4367 if (e->is_reg) 4368 bt_set_frame_reg(bt, e->frameno, e->regno); 4369 else 4370 bt_set_frame_slot(bt, e->frameno, e->spi); 4371 } 4372 } 4373 4374 /* For given verifier state backtrack_insn() is called from the last insn to 4375 * the first insn. Its purpose is to compute a bitmask of registers and 4376 * stack slots that needs precision in the parent verifier state. 4377 * 4378 * @idx is an index of the instruction we are currently processing; 4379 * @subseq_idx is an index of the subsequent instruction that: 4380 * - *would be* executed next, if jump history is viewed in forward order; 4381 * - *was* processed previously during backtracking. 4382 */ 4383 static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, 4384 struct bpf_jmp_history_entry *hist, struct backtrack_state *bt) 4385 { 4386 struct bpf_insn *insn = env->prog->insnsi + idx; 4387 u8 class = BPF_CLASS(insn->code); 4388 u8 opcode = BPF_OP(insn->code); 4389 u8 mode = BPF_MODE(insn->code); 4390 u32 dreg = insn->dst_reg; 4391 u32 sreg = insn->src_reg; 4392 u32 spi, i, fr; 4393 4394 if (insn->code == 0) 4395 return 0; 4396 if (env->log.level & BPF_LOG_LEVEL2) { 4397 fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt)); 4398 verbose(env, "mark_precise: frame%d: regs=%s ", 4399 bt->frame, env->tmp_str_buf); 4400 bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt)); 4401 verbose(env, "stack=%s before ", env->tmp_str_buf); 4402 verbose(env, "%d: ", idx); 4403 verbose_insn(env, insn); 4404 } 4405 4406 /* If there is a history record that some registers gained range at this insn, 4407 * propagate precision marks to those registers, so that bt_is_reg_set() 4408 * accounts for these registers. 4409 */ 4410 bt_sync_linked_regs(bt, hist); 4411 4412 if (class == BPF_ALU || class == BPF_ALU64) { 4413 if (!bt_is_reg_set(bt, dreg)) 4414 return 0; 4415 if (opcode == BPF_END || opcode == BPF_NEG) { 4416 /* sreg is reserved and unused 4417 * dreg still need precision before this insn 4418 */ 4419 return 0; 4420 } else if (opcode == BPF_MOV) { 4421 if (BPF_SRC(insn->code) == BPF_X) { 4422 /* dreg = sreg or dreg = (s8, s16, s32)sreg 4423 * dreg needs precision after this insn 4424 * sreg needs precision before this insn 4425 */ 4426 bt_clear_reg(bt, dreg); 4427 if (sreg != BPF_REG_FP) 4428 bt_set_reg(bt, sreg); 4429 } else { 4430 /* dreg = K 4431 * dreg needs precision after this insn. 4432 * Corresponding register is already marked 4433 * as precise=true in this verifier state. 4434 * No further markings in parent are necessary 4435 */ 4436 bt_clear_reg(bt, dreg); 4437 } 4438 } else { 4439 if (BPF_SRC(insn->code) == BPF_X) { 4440 /* dreg += sreg 4441 * both dreg and sreg need precision 4442 * before this insn 4443 */ 4444 if (sreg != BPF_REG_FP) 4445 bt_set_reg(bt, sreg); 4446 } /* else dreg += K 4447 * dreg still needs precision before this insn 4448 */ 4449 } 4450 } else if (class == BPF_LDX || is_atomic_load_insn(insn)) { 4451 if (!bt_is_reg_set(bt, dreg)) 4452 return 0; 4453 bt_clear_reg(bt, dreg); 4454 4455 /* scalars can only be spilled into stack w/o losing precision. 4456 * Load from any other memory can be zero extended. 4457 * The desire to keep that precision is already indicated 4458 * by 'precise' mark in corresponding register of this state. 4459 * No further tracking necessary. 4460 */ 4461 if (!hist || !(hist->flags & INSN_F_STACK_ACCESS)) 4462 return 0; 4463 /* dreg = *(u64 *)[fp - off] was a fill from the stack. 4464 * that [fp - off] slot contains scalar that needs to be 4465 * tracked with precision 4466 */ 4467 spi = insn_stack_access_spi(hist->flags); 4468 fr = insn_stack_access_frameno(hist->flags); 4469 bt_set_frame_slot(bt, fr, spi); 4470 } else if (class == BPF_STX || class == BPF_ST) { 4471 if (bt_is_reg_set(bt, dreg)) 4472 /* stx & st shouldn't be using _scalar_ dst_reg 4473 * to access memory. It means backtracking 4474 * encountered a case of pointer subtraction. 4475 */ 4476 return -ENOTSUPP; 4477 /* scalars can only be spilled into stack */ 4478 if (!hist || !(hist->flags & INSN_F_STACK_ACCESS)) 4479 return 0; 4480 spi = insn_stack_access_spi(hist->flags); 4481 fr = insn_stack_access_frameno(hist->flags); 4482 if (!bt_is_frame_slot_set(bt, fr, spi)) 4483 return 0; 4484 bt_clear_frame_slot(bt, fr, spi); 4485 if (class == BPF_STX) 4486 bt_set_reg(bt, sreg); 4487 } else if (class == BPF_JMP || class == BPF_JMP32) { 4488 if (bpf_pseudo_call(insn)) { 4489 int subprog_insn_idx, subprog; 4490 4491 subprog_insn_idx = idx + insn->imm + 1; 4492 subprog = find_subprog(env, subprog_insn_idx); 4493 if (subprog < 0) 4494 return -EFAULT; 4495 4496 if (subprog_is_global(env, subprog)) { 4497 /* check that jump history doesn't have any 4498 * extra instructions from subprog; the next 4499 * instruction after call to global subprog 4500 * should be literally next instruction in 4501 * caller program 4502 */ 4503 verifier_bug_if(idx + 1 != subseq_idx, env, 4504 "extra insn from subprog"); 4505 /* r1-r5 are invalidated after subprog call, 4506 * so for global func call it shouldn't be set 4507 * anymore 4508 */ 4509 if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { 4510 verifier_bug(env, "global subprog unexpected regs %x", 4511 bt_reg_mask(bt)); 4512 return -EFAULT; 4513 } 4514 /* global subprog always sets R0 */ 4515 bt_clear_reg(bt, BPF_REG_0); 4516 return 0; 4517 } else { 4518 /* static subprog call instruction, which 4519 * means that we are exiting current subprog, 4520 * so only r1-r5 could be still requested as 4521 * precise, r0 and r6-r10 or any stack slot in 4522 * the current frame should be zero by now 4523 */ 4524 if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { 4525 verifier_bug(env, "static subprog unexpected regs %x", 4526 bt_reg_mask(bt)); 4527 return -EFAULT; 4528 } 4529 /* we are now tracking register spills correctly, 4530 * so any instance of leftover slots is a bug 4531 */ 4532 if (bt_stack_mask(bt) != 0) { 4533 verifier_bug(env, 4534 "static subprog leftover stack slots %llx", 4535 bt_stack_mask(bt)); 4536 return -EFAULT; 4537 } 4538 /* propagate r1-r5 to the caller */ 4539 for (i = BPF_REG_1; i <= BPF_REG_5; i++) { 4540 if (bt_is_reg_set(bt, i)) { 4541 bt_clear_reg(bt, i); 4542 bt_set_frame_reg(bt, bt->frame - 1, i); 4543 } 4544 } 4545 if (bt_subprog_exit(bt)) 4546 return -EFAULT; 4547 return 0; 4548 } 4549 } else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) { 4550 /* exit from callback subprog to callback-calling helper or 4551 * kfunc call. Use idx/subseq_idx check to discern it from 4552 * straight line code backtracking. 4553 * Unlike the subprog call handling above, we shouldn't 4554 * propagate precision of r1-r5 (if any requested), as they are 4555 * not actually arguments passed directly to callback subprogs 4556 */ 4557 if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { 4558 verifier_bug(env, "callback unexpected regs %x", 4559 bt_reg_mask(bt)); 4560 return -EFAULT; 4561 } 4562 if (bt_stack_mask(bt) != 0) { 4563 verifier_bug(env, "callback leftover stack slots %llx", 4564 bt_stack_mask(bt)); 4565 return -EFAULT; 4566 } 4567 /* clear r1-r5 in callback subprog's mask */ 4568 for (i = BPF_REG_1; i <= BPF_REG_5; i++) 4569 bt_clear_reg(bt, i); 4570 if (bt_subprog_exit(bt)) 4571 return -EFAULT; 4572 return 0; 4573 } else if (opcode == BPF_CALL) { 4574 /* kfunc with imm==0 is invalid and fixup_kfunc_call will 4575 * catch this error later. Make backtracking conservative 4576 * with ENOTSUPP. 4577 */ 4578 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0) 4579 return -ENOTSUPP; 4580 /* regular helper call sets R0 */ 4581 bt_clear_reg(bt, BPF_REG_0); 4582 if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { 4583 /* if backtracking was looking for registers R1-R5 4584 * they should have been found already. 4585 */ 4586 verifier_bug(env, "backtracking call unexpected regs %x", 4587 bt_reg_mask(bt)); 4588 return -EFAULT; 4589 } 4590 if (insn->src_reg == BPF_REG_0 && insn->imm == BPF_FUNC_tail_call 4591 && subseq_idx - idx != 1) { 4592 if (bt_subprog_enter(bt)) 4593 return -EFAULT; 4594 } 4595 } else if (opcode == BPF_EXIT) { 4596 bool r0_precise; 4597 4598 /* Backtracking to a nested function call, 'idx' is a part of 4599 * the inner frame 'subseq_idx' is a part of the outer frame. 4600 * In case of a regular function call, instructions giving 4601 * precision to registers R1-R5 should have been found already. 4602 * In case of a callback, it is ok to have R1-R5 marked for 4603 * backtracking, as these registers are set by the function 4604 * invoking callback. 4605 */ 4606 if (subseq_idx >= 0 && bpf_calls_callback(env, subseq_idx)) 4607 for (i = BPF_REG_1; i <= BPF_REG_5; i++) 4608 bt_clear_reg(bt, i); 4609 if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { 4610 verifier_bug(env, "backtracking exit unexpected regs %x", 4611 bt_reg_mask(bt)); 4612 return -EFAULT; 4613 } 4614 4615 /* BPF_EXIT in subprog or callback always returns 4616 * right after the call instruction, so by checking 4617 * whether the instruction at subseq_idx-1 is subprog 4618 * call or not we can distinguish actual exit from 4619 * *subprog* from exit from *callback*. In the former 4620 * case, we need to propagate r0 precision, if 4621 * necessary. In the former we never do that. 4622 */ 4623 r0_precise = subseq_idx - 1 >= 0 && 4624 bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) && 4625 bt_is_reg_set(bt, BPF_REG_0); 4626 4627 bt_clear_reg(bt, BPF_REG_0); 4628 if (bt_subprog_enter(bt)) 4629 return -EFAULT; 4630 4631 if (r0_precise) 4632 bt_set_reg(bt, BPF_REG_0); 4633 /* r6-r9 and stack slots will stay set in caller frame 4634 * bitmasks until we return back from callee(s) 4635 */ 4636 return 0; 4637 } else if (BPF_SRC(insn->code) == BPF_X) { 4638 if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg)) 4639 return 0; 4640 /* dreg <cond> sreg 4641 * Both dreg and sreg need precision before 4642 * this insn. If only sreg was marked precise 4643 * before it would be equally necessary to 4644 * propagate it to dreg. 4645 */ 4646 if (!hist || !(hist->flags & INSN_F_SRC_REG_STACK)) 4647 bt_set_reg(bt, sreg); 4648 if (!hist || !(hist->flags & INSN_F_DST_REG_STACK)) 4649 bt_set_reg(bt, dreg); 4650 } else if (BPF_SRC(insn->code) == BPF_K) { 4651 /* dreg <cond> K 4652 * Only dreg still needs precision before 4653 * this insn, so for the K-based conditional 4654 * there is nothing new to be marked. 4655 */ 4656 } 4657 } else if (class == BPF_LD) { 4658 if (!bt_is_reg_set(bt, dreg)) 4659 return 0; 4660 bt_clear_reg(bt, dreg); 4661 /* It's ld_imm64 or ld_abs or ld_ind. 4662 * For ld_imm64 no further tracking of precision 4663 * into parent is necessary 4664 */ 4665 if (mode == BPF_IND || mode == BPF_ABS) 4666 /* to be analyzed */ 4667 return -ENOTSUPP; 4668 } 4669 /* Propagate precision marks to linked registers, to account for 4670 * registers marked as precise in this function. 4671 */ 4672 bt_sync_linked_regs(bt, hist); 4673 return 0; 4674 } 4675 4676 /* the scalar precision tracking algorithm: 4677 * . at the start all registers have precise=false. 4678 * . scalar ranges are tracked as normal through alu and jmp insns. 4679 * . once precise value of the scalar register is used in: 4680 * . ptr + scalar alu 4681 * . if (scalar cond K|scalar) 4682 * . helper_call(.., scalar, ...) where ARG_CONST is expected 4683 * backtrack through the verifier states and mark all registers and 4684 * stack slots with spilled constants that these scalar registers 4685 * should be precise. 4686 * . during state pruning two registers (or spilled stack slots) 4687 * are equivalent if both are not precise. 4688 * 4689 * Note the verifier cannot simply walk register parentage chain, 4690 * since many different registers and stack slots could have been 4691 * used to compute single precise scalar. 4692 * 4693 * The approach of starting with precise=true for all registers and then 4694 * backtrack to mark a register as not precise when the verifier detects 4695 * that program doesn't care about specific value (e.g., when helper 4696 * takes register as ARG_ANYTHING parameter) is not safe. 4697 * 4698 * It's ok to walk single parentage chain of the verifier states. 4699 * It's possible that this backtracking will go all the way till 1st insn. 4700 * All other branches will be explored for needing precision later. 4701 * 4702 * The backtracking needs to deal with cases like: 4703 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0) 4704 * r9 -= r8 4705 * r5 = r9 4706 * if r5 > 0x79f goto pc+7 4707 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff)) 4708 * r5 += 1 4709 * ... 4710 * call bpf_perf_event_output#25 4711 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO 4712 * 4713 * and this case: 4714 * r6 = 1 4715 * call foo // uses callee's r6 inside to compute r0 4716 * r0 += r6 4717 * if r0 == 0 goto 4718 * 4719 * to track above reg_mask/stack_mask needs to be independent for each frame. 4720 * 4721 * Also if parent's curframe > frame where backtracking started, 4722 * the verifier need to mark registers in both frames, otherwise callees 4723 * may incorrectly prune callers. This is similar to 4724 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences") 4725 * 4726 * For now backtracking falls back into conservative marking. 4727 */ 4728 static void mark_all_scalars_precise(struct bpf_verifier_env *env, 4729 struct bpf_verifier_state *st) 4730 { 4731 struct bpf_func_state *func; 4732 struct bpf_reg_state *reg; 4733 int i, j; 4734 4735 if (env->log.level & BPF_LOG_LEVEL2) { 4736 verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n", 4737 st->curframe); 4738 } 4739 4740 /* big hammer: mark all scalars precise in this path. 4741 * pop_stack may still get !precise scalars. 4742 * We also skip current state and go straight to first parent state, 4743 * because precision markings in current non-checkpointed state are 4744 * not needed. See why in the comment in __mark_chain_precision below. 4745 */ 4746 for (st = st->parent; st; st = st->parent) { 4747 for (i = 0; i <= st->curframe; i++) { 4748 func = st->frame[i]; 4749 for (j = 0; j < BPF_REG_FP; j++) { 4750 reg = &func->regs[j]; 4751 if (reg->type != SCALAR_VALUE || reg->precise) 4752 continue; 4753 reg->precise = true; 4754 if (env->log.level & BPF_LOG_LEVEL2) { 4755 verbose(env, "force_precise: frame%d: forcing r%d to be precise\n", 4756 i, j); 4757 } 4758 } 4759 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { 4760 if (!is_spilled_reg(&func->stack[j])) 4761 continue; 4762 reg = &func->stack[j].spilled_ptr; 4763 if (reg->type != SCALAR_VALUE || reg->precise) 4764 continue; 4765 reg->precise = true; 4766 if (env->log.level & BPF_LOG_LEVEL2) { 4767 verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n", 4768 i, -(j + 1) * 8); 4769 } 4770 } 4771 } 4772 } 4773 } 4774 4775 static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 4776 { 4777 struct bpf_func_state *func; 4778 struct bpf_reg_state *reg; 4779 int i, j; 4780 4781 for (i = 0; i <= st->curframe; i++) { 4782 func = st->frame[i]; 4783 for (j = 0; j < BPF_REG_FP; j++) { 4784 reg = &func->regs[j]; 4785 if (reg->type != SCALAR_VALUE) 4786 continue; 4787 reg->precise = false; 4788 } 4789 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { 4790 if (!is_spilled_reg(&func->stack[j])) 4791 continue; 4792 reg = &func->stack[j].spilled_ptr; 4793 if (reg->type != SCALAR_VALUE) 4794 continue; 4795 reg->precise = false; 4796 } 4797 } 4798 } 4799 4800 /* 4801 * __mark_chain_precision() backtracks BPF program instruction sequence and 4802 * chain of verifier states making sure that register *regno* (if regno >= 0) 4803 * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked 4804 * SCALARS, as well as any other registers and slots that contribute to 4805 * a tracked state of given registers/stack slots, depending on specific BPF 4806 * assembly instructions (see backtrack_insns() for exact instruction handling 4807 * logic). This backtracking relies on recorded jmp_history and is able to 4808 * traverse entire chain of parent states. This process ends only when all the 4809 * necessary registers/slots and their transitive dependencies are marked as 4810 * precise. 4811 * 4812 * One important and subtle aspect is that precise marks *do not matter* in 4813 * the currently verified state (current state). It is important to understand 4814 * why this is the case. 4815 * 4816 * First, note that current state is the state that is not yet "checkpointed", 4817 * i.e., it is not yet put into env->explored_states, and it has no children 4818 * states as well. It's ephemeral, and can end up either a) being discarded if 4819 * compatible explored state is found at some point or BPF_EXIT instruction is 4820 * reached or b) checkpointed and put into env->explored_states, branching out 4821 * into one or more children states. 4822 * 4823 * In the former case, precise markings in current state are completely 4824 * ignored by state comparison code (see regsafe() for details). Only 4825 * checkpointed ("old") state precise markings are important, and if old 4826 * state's register/slot is precise, regsafe() assumes current state's 4827 * register/slot as precise and checks value ranges exactly and precisely. If 4828 * states turn out to be compatible, current state's necessary precise 4829 * markings and any required parent states' precise markings are enforced 4830 * after the fact with propagate_precision() logic, after the fact. But it's 4831 * important to realize that in this case, even after marking current state 4832 * registers/slots as precise, we immediately discard current state. So what 4833 * actually matters is any of the precise markings propagated into current 4834 * state's parent states, which are always checkpointed (due to b) case above). 4835 * As such, for scenario a) it doesn't matter if current state has precise 4836 * markings set or not. 4837 * 4838 * Now, for the scenario b), checkpointing and forking into child(ren) 4839 * state(s). Note that before current state gets to checkpointing step, any 4840 * processed instruction always assumes precise SCALAR register/slot 4841 * knowledge: if precise value or range is useful to prune jump branch, BPF 4842 * verifier takes this opportunity enthusiastically. Similarly, when 4843 * register's value is used to calculate offset or memory address, exact 4844 * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to 4845 * what we mentioned above about state comparison ignoring precise markings 4846 * during state comparison, BPF verifier ignores and also assumes precise 4847 * markings *at will* during instruction verification process. But as verifier 4848 * assumes precision, it also propagates any precision dependencies across 4849 * parent states, which are not yet finalized, so can be further restricted 4850 * based on new knowledge gained from restrictions enforced by their children 4851 * states. This is so that once those parent states are finalized, i.e., when 4852 * they have no more active children state, state comparison logic in 4853 * is_state_visited() would enforce strict and precise SCALAR ranges, if 4854 * required for correctness. 4855 * 4856 * To build a bit more intuition, note also that once a state is checkpointed, 4857 * the path we took to get to that state is not important. This is crucial 4858 * property for state pruning. When state is checkpointed and finalized at 4859 * some instruction index, it can be correctly and safely used to "short 4860 * circuit" any *compatible* state that reaches exactly the same instruction 4861 * index. I.e., if we jumped to that instruction from a completely different 4862 * code path than original finalized state was derived from, it doesn't 4863 * matter, current state can be discarded because from that instruction 4864 * forward having a compatible state will ensure we will safely reach the 4865 * exit. States describe preconditions for further exploration, but completely 4866 * forget the history of how we got here. 4867 * 4868 * This also means that even if we needed precise SCALAR range to get to 4869 * finalized state, but from that point forward *that same* SCALAR register is 4870 * never used in a precise context (i.e., it's precise value is not needed for 4871 * correctness), it's correct and safe to mark such register as "imprecise" 4872 * (i.e., precise marking set to false). This is what we rely on when we do 4873 * not set precise marking in current state. If no child state requires 4874 * precision for any given SCALAR register, it's safe to dictate that it can 4875 * be imprecise. If any child state does require this register to be precise, 4876 * we'll mark it precise later retroactively during precise markings 4877 * propagation from child state to parent states. 4878 * 4879 * Skipping precise marking setting in current state is a mild version of 4880 * relying on the above observation. But we can utilize this property even 4881 * more aggressively by proactively forgetting any precise marking in the 4882 * current state (which we inherited from the parent state), right before we 4883 * checkpoint it and branch off into new child state. This is done by 4884 * mark_all_scalars_imprecise() to hopefully get more permissive and generic 4885 * finalized states which help in short circuiting more future states. 4886 */ 4887 static int __mark_chain_precision(struct bpf_verifier_env *env, 4888 struct bpf_verifier_state *starting_state, 4889 int regno, 4890 bool *changed) 4891 { 4892 struct bpf_verifier_state *st = starting_state; 4893 struct backtrack_state *bt = &env->bt; 4894 int first_idx = st->first_insn_idx; 4895 int last_idx = starting_state->insn_idx; 4896 int subseq_idx = -1; 4897 struct bpf_func_state *func; 4898 bool tmp, skip_first = true; 4899 struct bpf_reg_state *reg; 4900 int i, fr, err; 4901 4902 if (!env->bpf_capable) 4903 return 0; 4904 4905 changed = changed ?: &tmp; 4906 /* set frame number from which we are starting to backtrack */ 4907 bt_init(bt, starting_state->curframe); 4908 4909 /* Do sanity checks against current state of register and/or stack 4910 * slot, but don't set precise flag in current state, as precision 4911 * tracking in the current state is unnecessary. 4912 */ 4913 func = st->frame[bt->frame]; 4914 if (regno >= 0) { 4915 reg = &func->regs[regno]; 4916 if (reg->type != SCALAR_VALUE) { 4917 verifier_bug(env, "backtracking misuse"); 4918 return -EFAULT; 4919 } 4920 bt_set_reg(bt, regno); 4921 } 4922 4923 if (bt_empty(bt)) 4924 return 0; 4925 4926 for (;;) { 4927 DECLARE_BITMAP(mask, 64); 4928 u32 history = st->jmp_history_cnt; 4929 struct bpf_jmp_history_entry *hist; 4930 4931 if (env->log.level & BPF_LOG_LEVEL2) { 4932 verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n", 4933 bt->frame, last_idx, first_idx, subseq_idx); 4934 } 4935 4936 if (last_idx < 0) { 4937 /* we are at the entry into subprog, which 4938 * is expected for global funcs, but only if 4939 * requested precise registers are R1-R5 4940 * (which are global func's input arguments) 4941 */ 4942 if (st->curframe == 0 && 4943 st->frame[0]->subprogno > 0 && 4944 st->frame[0]->callsite == BPF_MAIN_FUNC && 4945 bt_stack_mask(bt) == 0 && 4946 (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) { 4947 bitmap_from_u64(mask, bt_reg_mask(bt)); 4948 for_each_set_bit(i, mask, 32) { 4949 reg = &st->frame[0]->regs[i]; 4950 bt_clear_reg(bt, i); 4951 if (reg->type == SCALAR_VALUE) { 4952 reg->precise = true; 4953 *changed = true; 4954 } 4955 } 4956 return 0; 4957 } 4958 4959 verifier_bug(env, "backtracking func entry subprog %d reg_mask %x stack_mask %llx", 4960 st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt)); 4961 return -EFAULT; 4962 } 4963 4964 for (i = last_idx;;) { 4965 if (skip_first) { 4966 err = 0; 4967 skip_first = false; 4968 } else { 4969 hist = get_jmp_hist_entry(st, history, i); 4970 err = backtrack_insn(env, i, subseq_idx, hist, bt); 4971 } 4972 if (err == -ENOTSUPP) { 4973 mark_all_scalars_precise(env, starting_state); 4974 bt_reset(bt); 4975 return 0; 4976 } else if (err) { 4977 return err; 4978 } 4979 if (bt_empty(bt)) 4980 /* Found assignment(s) into tracked register in this state. 4981 * Since this state is already marked, just return. 4982 * Nothing to be tracked further in the parent state. 4983 */ 4984 return 0; 4985 subseq_idx = i; 4986 i = get_prev_insn_idx(st, i, &history); 4987 if (i == -ENOENT) 4988 break; 4989 if (i >= env->prog->len) { 4990 /* This can happen if backtracking reached insn 0 4991 * and there are still reg_mask or stack_mask 4992 * to backtrack. 4993 * It means the backtracking missed the spot where 4994 * particular register was initialized with a constant. 4995 */ 4996 verifier_bug(env, "backtracking idx %d", i); 4997 return -EFAULT; 4998 } 4999 } 5000 st = st->parent; 5001 if (!st) 5002 break; 5003 5004 for (fr = bt->frame; fr >= 0; fr--) { 5005 func = st->frame[fr]; 5006 bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr)); 5007 for_each_set_bit(i, mask, 32) { 5008 reg = &func->regs[i]; 5009 if (reg->type != SCALAR_VALUE) { 5010 bt_clear_frame_reg(bt, fr, i); 5011 continue; 5012 } 5013 if (reg->precise) { 5014 bt_clear_frame_reg(bt, fr, i); 5015 } else { 5016 reg->precise = true; 5017 *changed = true; 5018 } 5019 } 5020 5021 bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr)); 5022 for_each_set_bit(i, mask, 64) { 5023 if (verifier_bug_if(i >= func->allocated_stack / BPF_REG_SIZE, 5024 env, "stack slot %d, total slots %d", 5025 i, func->allocated_stack / BPF_REG_SIZE)) 5026 return -EFAULT; 5027 5028 if (!is_spilled_scalar_reg(&func->stack[i])) { 5029 bt_clear_frame_slot(bt, fr, i); 5030 continue; 5031 } 5032 reg = &func->stack[i].spilled_ptr; 5033 if (reg->precise) { 5034 bt_clear_frame_slot(bt, fr, i); 5035 } else { 5036 reg->precise = true; 5037 *changed = true; 5038 } 5039 } 5040 if (env->log.level & BPF_LOG_LEVEL2) { 5041 fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, 5042 bt_frame_reg_mask(bt, fr)); 5043 verbose(env, "mark_precise: frame%d: parent state regs=%s ", 5044 fr, env->tmp_str_buf); 5045 bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, 5046 bt_frame_stack_mask(bt, fr)); 5047 verbose(env, "stack=%s: ", env->tmp_str_buf); 5048 print_verifier_state(env, st, fr, true); 5049 } 5050 } 5051 5052 if (bt_empty(bt)) 5053 return 0; 5054 5055 subseq_idx = first_idx; 5056 last_idx = st->last_insn_idx; 5057 first_idx = st->first_insn_idx; 5058 } 5059 5060 /* if we still have requested precise regs or slots, we missed 5061 * something (e.g., stack access through non-r10 register), so 5062 * fallback to marking all precise 5063 */ 5064 if (!bt_empty(bt)) { 5065 mark_all_scalars_precise(env, starting_state); 5066 bt_reset(bt); 5067 } 5068 5069 return 0; 5070 } 5071 5072 int mark_chain_precision(struct bpf_verifier_env *env, int regno) 5073 { 5074 return __mark_chain_precision(env, env->cur_state, regno, NULL); 5075 } 5076 5077 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to 5078 * desired reg and stack masks across all relevant frames 5079 */ 5080 static int mark_chain_precision_batch(struct bpf_verifier_env *env, 5081 struct bpf_verifier_state *starting_state) 5082 { 5083 return __mark_chain_precision(env, starting_state, -1, NULL); 5084 } 5085 5086 static bool is_spillable_regtype(enum bpf_reg_type type) 5087 { 5088 switch (base_type(type)) { 5089 case PTR_TO_MAP_VALUE: 5090 case PTR_TO_STACK: 5091 case PTR_TO_CTX: 5092 case PTR_TO_PACKET: 5093 case PTR_TO_PACKET_META: 5094 case PTR_TO_PACKET_END: 5095 case PTR_TO_FLOW_KEYS: 5096 case CONST_PTR_TO_MAP: 5097 case PTR_TO_SOCKET: 5098 case PTR_TO_SOCK_COMMON: 5099 case PTR_TO_TCP_SOCK: 5100 case PTR_TO_XDP_SOCK: 5101 case PTR_TO_BTF_ID: 5102 case PTR_TO_BUF: 5103 case PTR_TO_MEM: 5104 case PTR_TO_FUNC: 5105 case PTR_TO_MAP_KEY: 5106 case PTR_TO_ARENA: 5107 return true; 5108 default: 5109 return false; 5110 } 5111 } 5112 5113 /* Does this register contain a constant zero? */ 5114 static bool register_is_null(struct bpf_reg_state *reg) 5115 { 5116 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0); 5117 } 5118 5119 /* check if register is a constant scalar value */ 5120 static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32) 5121 { 5122 return reg->type == SCALAR_VALUE && 5123 tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off); 5124 } 5125 5126 /* assuming is_reg_const() is true, return constant value of a register */ 5127 static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32) 5128 { 5129 return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value; 5130 } 5131 5132 static bool __is_pointer_value(bool allow_ptr_leaks, 5133 const struct bpf_reg_state *reg) 5134 { 5135 if (allow_ptr_leaks) 5136 return false; 5137 5138 return reg->type != SCALAR_VALUE; 5139 } 5140 5141 static void assign_scalar_id_before_mov(struct bpf_verifier_env *env, 5142 struct bpf_reg_state *src_reg) 5143 { 5144 if (src_reg->type != SCALAR_VALUE) 5145 return; 5146 5147 if (src_reg->id & BPF_ADD_CONST) { 5148 /* 5149 * The verifier is processing rX = rY insn and 5150 * rY->id has special linked register already. 5151 * Cleared it, since multiple rX += const are not supported. 5152 */ 5153 src_reg->id = 0; 5154 src_reg->off = 0; 5155 } 5156 5157 if (!src_reg->id && !tnum_is_const(src_reg->var_off)) 5158 /* Ensure that src_reg has a valid ID that will be copied to 5159 * dst_reg and then will be used by sync_linked_regs() to 5160 * propagate min/max range. 5161 */ 5162 src_reg->id = ++env->id_gen; 5163 } 5164 5165 /* Copy src state preserving dst->parent and dst->live fields */ 5166 static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src) 5167 { 5168 *dst = *src; 5169 } 5170 5171 static void save_register_state(struct bpf_verifier_env *env, 5172 struct bpf_func_state *state, 5173 int spi, struct bpf_reg_state *reg, 5174 int size) 5175 { 5176 int i; 5177 5178 copy_register_state(&state->stack[spi].spilled_ptr, reg); 5179 5180 for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--) 5181 state->stack[spi].slot_type[i - 1] = STACK_SPILL; 5182 5183 /* size < 8 bytes spill */ 5184 for (; i; i--) 5185 mark_stack_slot_misc(env, &state->stack[spi].slot_type[i - 1]); 5186 } 5187 5188 static bool is_bpf_st_mem(struct bpf_insn *insn) 5189 { 5190 return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM; 5191 } 5192 5193 static int get_reg_width(struct bpf_reg_state *reg) 5194 { 5195 return fls64(reg->umax_value); 5196 } 5197 5198 /* See comment for mark_fastcall_pattern_for_call() */ 5199 static void check_fastcall_stack_contract(struct bpf_verifier_env *env, 5200 struct bpf_func_state *state, int insn_idx, int off) 5201 { 5202 struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno]; 5203 struct bpf_insn_aux_data *aux = env->insn_aux_data; 5204 int i; 5205 5206 if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern) 5207 return; 5208 /* access to the region [max_stack_depth .. fastcall_stack_off) 5209 * from something that is not a part of the fastcall pattern, 5210 * disable fastcall rewrites for current subprogram by setting 5211 * fastcall_stack_off to a value smaller than any possible offset. 5212 */ 5213 subprog->fastcall_stack_off = S16_MIN; 5214 /* reset fastcall aux flags within subprogram, 5215 * happens at most once per subprogram 5216 */ 5217 for (i = subprog->start; i < (subprog + 1)->start; ++i) { 5218 aux[i].fastcall_spills_num = 0; 5219 aux[i].fastcall_pattern = 0; 5220 } 5221 } 5222 5223 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers, 5224 * stack boundary and alignment are checked in check_mem_access() 5225 */ 5226 static int check_stack_write_fixed_off(struct bpf_verifier_env *env, 5227 /* stack frame we're writing to */ 5228 struct bpf_func_state *state, 5229 int off, int size, int value_regno, 5230 int insn_idx) 5231 { 5232 struct bpf_func_state *cur; /* state of the current function */ 5233 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; 5234 struct bpf_insn *insn = &env->prog->insnsi[insn_idx]; 5235 struct bpf_reg_state *reg = NULL; 5236 int insn_flags = insn_stack_access_flags(state->frameno, spi); 5237 5238 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, 5239 * so it's aligned access and [off, off + size) are within stack limits 5240 */ 5241 if (!env->allow_ptr_leaks && 5242 is_spilled_reg(&state->stack[spi]) && 5243 !is_spilled_scalar_reg(&state->stack[spi]) && 5244 size != BPF_REG_SIZE) { 5245 verbose(env, "attempt to corrupt spilled pointer on stack\n"); 5246 return -EACCES; 5247 } 5248 5249 cur = env->cur_state->frame[env->cur_state->curframe]; 5250 if (value_regno >= 0) 5251 reg = &cur->regs[value_regno]; 5252 if (!env->bypass_spec_v4) { 5253 bool sanitize = reg && is_spillable_regtype(reg->type); 5254 5255 for (i = 0; i < size; i++) { 5256 u8 type = state->stack[spi].slot_type[i]; 5257 5258 if (type != STACK_MISC && type != STACK_ZERO) { 5259 sanitize = true; 5260 break; 5261 } 5262 } 5263 5264 if (sanitize) 5265 env->insn_aux_data[insn_idx].nospec_result = true; 5266 } 5267 5268 err = destroy_if_dynptr_stack_slot(env, state, spi); 5269 if (err) 5270 return err; 5271 5272 if (!(off % BPF_REG_SIZE) && size == BPF_REG_SIZE) { 5273 /* only mark the slot as written if all 8 bytes were written 5274 * otherwise read propagation may incorrectly stop too soon 5275 * when stack slots are partially written. 5276 * This heuristic means that read propagation will be 5277 * conservative, since it will add reg_live_read marks 5278 * to stack slots all the way to first state when programs 5279 * writes+reads less than 8 bytes 5280 */ 5281 bpf_mark_stack_write(env, state->frameno, BIT(spi)); 5282 } 5283 5284 check_fastcall_stack_contract(env, state, insn_idx, off); 5285 mark_stack_slot_scratched(env, spi); 5286 if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) { 5287 bool reg_value_fits; 5288 5289 reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size; 5290 /* Make sure that reg had an ID to build a relation on spill. */ 5291 if (reg_value_fits) 5292 assign_scalar_id_before_mov(env, reg); 5293 save_register_state(env, state, spi, reg, size); 5294 /* Break the relation on a narrowing spill. */ 5295 if (!reg_value_fits) 5296 state->stack[spi].spilled_ptr.id = 0; 5297 } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) && 5298 env->bpf_capable) { 5299 struct bpf_reg_state *tmp_reg = &env->fake_reg[0]; 5300 5301 memset(tmp_reg, 0, sizeof(*tmp_reg)); 5302 __mark_reg_known(tmp_reg, insn->imm); 5303 tmp_reg->type = SCALAR_VALUE; 5304 save_register_state(env, state, spi, tmp_reg, size); 5305 } else if (reg && is_spillable_regtype(reg->type)) { 5306 /* register containing pointer is being spilled into stack */ 5307 if (size != BPF_REG_SIZE) { 5308 verbose_linfo(env, insn_idx, "; "); 5309 verbose(env, "invalid size of register spill\n"); 5310 return -EACCES; 5311 } 5312 if (state != cur && reg->type == PTR_TO_STACK) { 5313 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); 5314 return -EINVAL; 5315 } 5316 save_register_state(env, state, spi, reg, size); 5317 } else { 5318 u8 type = STACK_MISC; 5319 5320 /* regular write of data into stack destroys any spilled ptr */ 5321 state->stack[spi].spilled_ptr.type = NOT_INIT; 5322 /* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */ 5323 if (is_stack_slot_special(&state->stack[spi])) 5324 for (i = 0; i < BPF_REG_SIZE; i++) 5325 scrub_spilled_slot(&state->stack[spi].slot_type[i]); 5326 5327 /* when we zero initialize stack slots mark them as such */ 5328 if ((reg && register_is_null(reg)) || 5329 (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) { 5330 /* STACK_ZERO case happened because register spill 5331 * wasn't properly aligned at the stack slot boundary, 5332 * so it's not a register spill anymore; force 5333 * originating register to be precise to make 5334 * STACK_ZERO correct for subsequent states 5335 */ 5336 err = mark_chain_precision(env, value_regno); 5337 if (err) 5338 return err; 5339 type = STACK_ZERO; 5340 } 5341 5342 /* Mark slots affected by this stack write. */ 5343 for (i = 0; i < size; i++) 5344 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type; 5345 insn_flags = 0; /* not a register spill */ 5346 } 5347 5348 if (insn_flags) 5349 return push_jmp_history(env, env->cur_state, insn_flags, 0); 5350 return 0; 5351 } 5352 5353 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is 5354 * known to contain a variable offset. 5355 * This function checks whether the write is permitted and conservatively 5356 * tracks the effects of the write, considering that each stack slot in the 5357 * dynamic range is potentially written to. 5358 * 5359 * 'off' includes 'regno->off'. 5360 * 'value_regno' can be -1, meaning that an unknown value is being written to 5361 * the stack. 5362 * 5363 * Spilled pointers in range are not marked as written because we don't know 5364 * what's going to be actually written. This means that read propagation for 5365 * future reads cannot be terminated by this write. 5366 * 5367 * For privileged programs, uninitialized stack slots are considered 5368 * initialized by this write (even though we don't know exactly what offsets 5369 * are going to be written to). The idea is that we don't want the verifier to 5370 * reject future reads that access slots written to through variable offsets. 5371 */ 5372 static int check_stack_write_var_off(struct bpf_verifier_env *env, 5373 /* func where register points to */ 5374 struct bpf_func_state *state, 5375 int ptr_regno, int off, int size, 5376 int value_regno, int insn_idx) 5377 { 5378 struct bpf_func_state *cur; /* state of the current function */ 5379 int min_off, max_off; 5380 int i, err; 5381 struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL; 5382 struct bpf_insn *insn = &env->prog->insnsi[insn_idx]; 5383 bool writing_zero = false; 5384 /* set if the fact that we're writing a zero is used to let any 5385 * stack slots remain STACK_ZERO 5386 */ 5387 bool zero_used = false; 5388 5389 cur = env->cur_state->frame[env->cur_state->curframe]; 5390 ptr_reg = &cur->regs[ptr_regno]; 5391 min_off = ptr_reg->smin_value + off; 5392 max_off = ptr_reg->smax_value + off + size; 5393 if (value_regno >= 0) 5394 value_reg = &cur->regs[value_regno]; 5395 if ((value_reg && register_is_null(value_reg)) || 5396 (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0)) 5397 writing_zero = true; 5398 5399 for (i = min_off; i < max_off; i++) { 5400 int spi; 5401 5402 spi = __get_spi(i); 5403 err = destroy_if_dynptr_stack_slot(env, state, spi); 5404 if (err) 5405 return err; 5406 } 5407 5408 check_fastcall_stack_contract(env, state, insn_idx, min_off); 5409 /* Variable offset writes destroy any spilled pointers in range. */ 5410 for (i = min_off; i < max_off; i++) { 5411 u8 new_type, *stype; 5412 int slot, spi; 5413 5414 slot = -i - 1; 5415 spi = slot / BPF_REG_SIZE; 5416 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; 5417 mark_stack_slot_scratched(env, spi); 5418 5419 if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) { 5420 /* Reject the write if range we may write to has not 5421 * been initialized beforehand. If we didn't reject 5422 * here, the ptr status would be erased below (even 5423 * though not all slots are actually overwritten), 5424 * possibly opening the door to leaks. 5425 * 5426 * We do however catch STACK_INVALID case below, and 5427 * only allow reading possibly uninitialized memory 5428 * later for CAP_PERFMON, as the write may not happen to 5429 * that slot. 5430 */ 5431 verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d", 5432 insn_idx, i); 5433 return -EINVAL; 5434 } 5435 5436 /* If writing_zero and the spi slot contains a spill of value 0, 5437 * maintain the spill type. 5438 */ 5439 if (writing_zero && *stype == STACK_SPILL && 5440 is_spilled_scalar_reg(&state->stack[spi])) { 5441 struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr; 5442 5443 if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) { 5444 zero_used = true; 5445 continue; 5446 } 5447 } 5448 5449 /* Erase all other spilled pointers. */ 5450 state->stack[spi].spilled_ptr.type = NOT_INIT; 5451 5452 /* Update the slot type. */ 5453 new_type = STACK_MISC; 5454 if (writing_zero && *stype == STACK_ZERO) { 5455 new_type = STACK_ZERO; 5456 zero_used = true; 5457 } 5458 /* If the slot is STACK_INVALID, we check whether it's OK to 5459 * pretend that it will be initialized by this write. The slot 5460 * might not actually be written to, and so if we mark it as 5461 * initialized future reads might leak uninitialized memory. 5462 * For privileged programs, we will accept such reads to slots 5463 * that may or may not be written because, if we're reject 5464 * them, the error would be too confusing. 5465 */ 5466 if (*stype == STACK_INVALID && !env->allow_uninit_stack) { 5467 verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d", 5468 insn_idx, i); 5469 return -EINVAL; 5470 } 5471 *stype = new_type; 5472 } 5473 if (zero_used) { 5474 /* backtracking doesn't work for STACK_ZERO yet. */ 5475 err = mark_chain_precision(env, value_regno); 5476 if (err) 5477 return err; 5478 } 5479 return 0; 5480 } 5481 5482 /* When register 'dst_regno' is assigned some values from stack[min_off, 5483 * max_off), we set the register's type according to the types of the 5484 * respective stack slots. If all the stack values are known to be zeros, then 5485 * so is the destination reg. Otherwise, the register is considered to be 5486 * SCALAR. This function does not deal with register filling; the caller must 5487 * ensure that all spilled registers in the stack range have been marked as 5488 * read. 5489 */ 5490 static void mark_reg_stack_read(struct bpf_verifier_env *env, 5491 /* func where src register points to */ 5492 struct bpf_func_state *ptr_state, 5493 int min_off, int max_off, int dst_regno) 5494 { 5495 struct bpf_verifier_state *vstate = env->cur_state; 5496 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 5497 int i, slot, spi; 5498 u8 *stype; 5499 int zeros = 0; 5500 5501 for (i = min_off; i < max_off; i++) { 5502 slot = -i - 1; 5503 spi = slot / BPF_REG_SIZE; 5504 mark_stack_slot_scratched(env, spi); 5505 stype = ptr_state->stack[spi].slot_type; 5506 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO) 5507 break; 5508 zeros++; 5509 } 5510 if (zeros == max_off - min_off) { 5511 /* Any access_size read into register is zero extended, 5512 * so the whole register == const_zero. 5513 */ 5514 __mark_reg_const_zero(env, &state->regs[dst_regno]); 5515 } else { 5516 /* have read misc data from the stack */ 5517 mark_reg_unknown(env, state->regs, dst_regno); 5518 } 5519 } 5520 5521 /* Read the stack at 'off' and put the results into the register indicated by 5522 * 'dst_regno'. It handles reg filling if the addressed stack slot is a 5523 * spilled reg. 5524 * 5525 * 'dst_regno' can be -1, meaning that the read value is not going to a 5526 * register. 5527 * 5528 * The access is assumed to be within the current stack bounds. 5529 */ 5530 static int check_stack_read_fixed_off(struct bpf_verifier_env *env, 5531 /* func where src register points to */ 5532 struct bpf_func_state *reg_state, 5533 int off, int size, int dst_regno) 5534 { 5535 struct bpf_verifier_state *vstate = env->cur_state; 5536 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 5537 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; 5538 struct bpf_reg_state *reg; 5539 u8 *stype, type; 5540 int insn_flags = insn_stack_access_flags(reg_state->frameno, spi); 5541 int err; 5542 5543 stype = reg_state->stack[spi].slot_type; 5544 reg = ®_state->stack[spi].spilled_ptr; 5545 5546 mark_stack_slot_scratched(env, spi); 5547 check_fastcall_stack_contract(env, state, env->insn_idx, off); 5548 err = bpf_mark_stack_read(env, reg_state->frameno, env->insn_idx, BIT(spi)); 5549 if (err) 5550 return err; 5551 5552 if (is_spilled_reg(®_state->stack[spi])) { 5553 u8 spill_size = 1; 5554 5555 for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--) 5556 spill_size++; 5557 5558 if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) { 5559 if (reg->type != SCALAR_VALUE) { 5560 verbose_linfo(env, env->insn_idx, "; "); 5561 verbose(env, "invalid size of register fill\n"); 5562 return -EACCES; 5563 } 5564 5565 if (dst_regno < 0) 5566 return 0; 5567 5568 if (size <= spill_size && 5569 bpf_stack_narrow_access_ok(off, size, spill_size)) { 5570 /* The earlier check_reg_arg() has decided the 5571 * subreg_def for this insn. Save it first. 5572 */ 5573 s32 subreg_def = state->regs[dst_regno].subreg_def; 5574 5575 if (env->bpf_capable && size == 4 && spill_size == 4 && 5576 get_reg_width(reg) <= 32) 5577 /* Ensure stack slot has an ID to build a relation 5578 * with the destination register on fill. 5579 */ 5580 assign_scalar_id_before_mov(env, reg); 5581 copy_register_state(&state->regs[dst_regno], reg); 5582 state->regs[dst_regno].subreg_def = subreg_def; 5583 5584 /* Break the relation on a narrowing fill. 5585 * coerce_reg_to_size will adjust the boundaries. 5586 */ 5587 if (get_reg_width(reg) > size * BITS_PER_BYTE) 5588 state->regs[dst_regno].id = 0; 5589 } else { 5590 int spill_cnt = 0, zero_cnt = 0; 5591 5592 for (i = 0; i < size; i++) { 5593 type = stype[(slot - i) % BPF_REG_SIZE]; 5594 if (type == STACK_SPILL) { 5595 spill_cnt++; 5596 continue; 5597 } 5598 if (type == STACK_MISC) 5599 continue; 5600 if (type == STACK_ZERO) { 5601 zero_cnt++; 5602 continue; 5603 } 5604 if (type == STACK_INVALID && env->allow_uninit_stack) 5605 continue; 5606 verbose(env, "invalid read from stack off %d+%d size %d\n", 5607 off, i, size); 5608 return -EACCES; 5609 } 5610 5611 if (spill_cnt == size && 5612 tnum_is_const(reg->var_off) && reg->var_off.value == 0) { 5613 __mark_reg_const_zero(env, &state->regs[dst_regno]); 5614 /* this IS register fill, so keep insn_flags */ 5615 } else if (zero_cnt == size) { 5616 /* similarly to mark_reg_stack_read(), preserve zeroes */ 5617 __mark_reg_const_zero(env, &state->regs[dst_regno]); 5618 insn_flags = 0; /* not restoring original register state */ 5619 } else { 5620 mark_reg_unknown(env, state->regs, dst_regno); 5621 insn_flags = 0; /* not restoring original register state */ 5622 } 5623 } 5624 } else if (dst_regno >= 0) { 5625 /* restore register state from stack */ 5626 if (env->bpf_capable) 5627 /* Ensure stack slot has an ID to build a relation 5628 * with the destination register on fill. 5629 */ 5630 assign_scalar_id_before_mov(env, reg); 5631 copy_register_state(&state->regs[dst_regno], reg); 5632 /* mark reg as written since spilled pointer state likely 5633 * has its liveness marks cleared by is_state_visited() 5634 * which resets stack/reg liveness for state transitions 5635 */ 5636 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) { 5637 /* If dst_regno==-1, the caller is asking us whether 5638 * it is acceptable to use this value as a SCALAR_VALUE 5639 * (e.g. for XADD). 5640 * We must not allow unprivileged callers to do that 5641 * with spilled pointers. 5642 */ 5643 verbose(env, "leaking pointer from stack off %d\n", 5644 off); 5645 return -EACCES; 5646 } 5647 } else { 5648 for (i = 0; i < size; i++) { 5649 type = stype[(slot - i) % BPF_REG_SIZE]; 5650 if (type == STACK_MISC) 5651 continue; 5652 if (type == STACK_ZERO) 5653 continue; 5654 if (type == STACK_INVALID && env->allow_uninit_stack) 5655 continue; 5656 verbose(env, "invalid read from stack off %d+%d size %d\n", 5657 off, i, size); 5658 return -EACCES; 5659 } 5660 if (dst_regno >= 0) 5661 mark_reg_stack_read(env, reg_state, off, off + size, dst_regno); 5662 insn_flags = 0; /* we are not restoring spilled register */ 5663 } 5664 if (insn_flags) 5665 return push_jmp_history(env, env->cur_state, insn_flags, 0); 5666 return 0; 5667 } 5668 5669 enum bpf_access_src { 5670 ACCESS_DIRECT = 1, /* the access is performed by an instruction */ 5671 ACCESS_HELPER = 2, /* the access is performed by a helper */ 5672 }; 5673 5674 static int check_stack_range_initialized(struct bpf_verifier_env *env, 5675 int regno, int off, int access_size, 5676 bool zero_size_allowed, 5677 enum bpf_access_type type, 5678 struct bpf_call_arg_meta *meta); 5679 5680 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) 5681 { 5682 return cur_regs(env) + regno; 5683 } 5684 5685 /* Read the stack at 'ptr_regno + off' and put the result into the register 5686 * 'dst_regno'. 5687 * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'), 5688 * but not its variable offset. 5689 * 'size' is assumed to be <= reg size and the access is assumed to be aligned. 5690 * 5691 * As opposed to check_stack_read_fixed_off, this function doesn't deal with 5692 * filling registers (i.e. reads of spilled register cannot be detected when 5693 * the offset is not fixed). We conservatively mark 'dst_regno' as containing 5694 * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable 5695 * offset; for a fixed offset check_stack_read_fixed_off should be used 5696 * instead. 5697 */ 5698 static int check_stack_read_var_off(struct bpf_verifier_env *env, 5699 int ptr_regno, int off, int size, int dst_regno) 5700 { 5701 /* The state of the source register. */ 5702 struct bpf_reg_state *reg = reg_state(env, ptr_regno); 5703 struct bpf_func_state *ptr_state = func(env, reg); 5704 int err; 5705 int min_off, max_off; 5706 5707 /* Note that we pass a NULL meta, so raw access will not be permitted. 5708 */ 5709 err = check_stack_range_initialized(env, ptr_regno, off, size, 5710 false, BPF_READ, NULL); 5711 if (err) 5712 return err; 5713 5714 min_off = reg->smin_value + off; 5715 max_off = reg->smax_value + off; 5716 mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno); 5717 check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off); 5718 return 0; 5719 } 5720 5721 /* check_stack_read dispatches to check_stack_read_fixed_off or 5722 * check_stack_read_var_off. 5723 * 5724 * The caller must ensure that the offset falls within the allocated stack 5725 * bounds. 5726 * 5727 * 'dst_regno' is a register which will receive the value from the stack. It 5728 * can be -1, meaning that the read value is not going to a register. 5729 */ 5730 static int check_stack_read(struct bpf_verifier_env *env, 5731 int ptr_regno, int off, int size, 5732 int dst_regno) 5733 { 5734 struct bpf_reg_state *reg = reg_state(env, ptr_regno); 5735 struct bpf_func_state *state = func(env, reg); 5736 int err; 5737 /* Some accesses are only permitted with a static offset. */ 5738 bool var_off = !tnum_is_const(reg->var_off); 5739 5740 /* The offset is required to be static when reads don't go to a 5741 * register, in order to not leak pointers (see 5742 * check_stack_read_fixed_off). 5743 */ 5744 if (dst_regno < 0 && var_off) { 5745 char tn_buf[48]; 5746 5747 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 5748 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n", 5749 tn_buf, off, size); 5750 return -EACCES; 5751 } 5752 /* Variable offset is prohibited for unprivileged mode for simplicity 5753 * since it requires corresponding support in Spectre masking for stack 5754 * ALU. See also retrieve_ptr_limit(). The check in 5755 * check_stack_access_for_ptr_arithmetic() called by 5756 * adjust_ptr_min_max_vals() prevents users from creating stack pointers 5757 * with variable offsets, therefore no check is required here. Further, 5758 * just checking it here would be insufficient as speculative stack 5759 * writes could still lead to unsafe speculative behaviour. 5760 */ 5761 if (!var_off) { 5762 off += reg->var_off.value; 5763 err = check_stack_read_fixed_off(env, state, off, size, 5764 dst_regno); 5765 } else { 5766 /* Variable offset stack reads need more conservative handling 5767 * than fixed offset ones. Note that dst_regno >= 0 on this 5768 * branch. 5769 */ 5770 err = check_stack_read_var_off(env, ptr_regno, off, size, 5771 dst_regno); 5772 } 5773 return err; 5774 } 5775 5776 5777 /* check_stack_write dispatches to check_stack_write_fixed_off or 5778 * check_stack_write_var_off. 5779 * 5780 * 'ptr_regno' is the register used as a pointer into the stack. 5781 * 'off' includes 'ptr_regno->off', but not its variable offset (if any). 5782 * 'value_regno' is the register whose value we're writing to the stack. It can 5783 * be -1, meaning that we're not writing from a register. 5784 * 5785 * The caller must ensure that the offset falls within the maximum stack size. 5786 */ 5787 static int check_stack_write(struct bpf_verifier_env *env, 5788 int ptr_regno, int off, int size, 5789 int value_regno, int insn_idx) 5790 { 5791 struct bpf_reg_state *reg = reg_state(env, ptr_regno); 5792 struct bpf_func_state *state = func(env, reg); 5793 int err; 5794 5795 if (tnum_is_const(reg->var_off)) { 5796 off += reg->var_off.value; 5797 err = check_stack_write_fixed_off(env, state, off, size, 5798 value_regno, insn_idx); 5799 } else { 5800 /* Variable offset stack reads need more conservative handling 5801 * than fixed offset ones. 5802 */ 5803 err = check_stack_write_var_off(env, state, 5804 ptr_regno, off, size, 5805 value_regno, insn_idx); 5806 } 5807 return err; 5808 } 5809 5810 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno, 5811 int off, int size, enum bpf_access_type type) 5812 { 5813 struct bpf_reg_state *reg = reg_state(env, regno); 5814 struct bpf_map *map = reg->map_ptr; 5815 u32 cap = bpf_map_flags_to_cap(map); 5816 5817 if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) { 5818 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n", 5819 map->value_size, off, size); 5820 return -EACCES; 5821 } 5822 5823 if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) { 5824 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n", 5825 map->value_size, off, size); 5826 return -EACCES; 5827 } 5828 5829 return 0; 5830 } 5831 5832 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */ 5833 static int __check_mem_access(struct bpf_verifier_env *env, int regno, 5834 int off, int size, u32 mem_size, 5835 bool zero_size_allowed) 5836 { 5837 bool size_ok = size > 0 || (size == 0 && zero_size_allowed); 5838 struct bpf_reg_state *reg; 5839 5840 if (off >= 0 && size_ok && (u64)off + size <= mem_size) 5841 return 0; 5842 5843 reg = &cur_regs(env)[regno]; 5844 switch (reg->type) { 5845 case PTR_TO_MAP_KEY: 5846 verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n", 5847 mem_size, off, size); 5848 break; 5849 case PTR_TO_MAP_VALUE: 5850 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", 5851 mem_size, off, size); 5852 break; 5853 case PTR_TO_PACKET: 5854 case PTR_TO_PACKET_META: 5855 case PTR_TO_PACKET_END: 5856 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", 5857 off, size, regno, reg->id, off, mem_size); 5858 break; 5859 case PTR_TO_MEM: 5860 default: 5861 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n", 5862 mem_size, off, size); 5863 } 5864 5865 return -EACCES; 5866 } 5867 5868 /* check read/write into a memory region with possible variable offset */ 5869 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno, 5870 int off, int size, u32 mem_size, 5871 bool zero_size_allowed) 5872 { 5873 struct bpf_verifier_state *vstate = env->cur_state; 5874 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 5875 struct bpf_reg_state *reg = &state->regs[regno]; 5876 int err; 5877 5878 /* We may have adjusted the register pointing to memory region, so we 5879 * need to try adding each of min_value and max_value to off 5880 * to make sure our theoretical access will be safe. 5881 * 5882 * The minimum value is only important with signed 5883 * comparisons where we can't assume the floor of a 5884 * value is 0. If we are using signed variables for our 5885 * index'es we need to make sure that whatever we use 5886 * will have a set floor within our range. 5887 */ 5888 if (reg->smin_value < 0 && 5889 (reg->smin_value == S64_MIN || 5890 (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) || 5891 reg->smin_value + off < 0)) { 5892 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 5893 regno); 5894 return -EACCES; 5895 } 5896 err = __check_mem_access(env, regno, reg->smin_value + off, size, 5897 mem_size, zero_size_allowed); 5898 if (err) { 5899 verbose(env, "R%d min value is outside of the allowed memory range\n", 5900 regno); 5901 return err; 5902 } 5903 5904 /* If we haven't set a max value then we need to bail since we can't be 5905 * sure we won't do bad things. 5906 * If reg->umax_value + off could overflow, treat that as unbounded too. 5907 */ 5908 if (reg->umax_value >= BPF_MAX_VAR_OFF) { 5909 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n", 5910 regno); 5911 return -EACCES; 5912 } 5913 err = __check_mem_access(env, regno, reg->umax_value + off, size, 5914 mem_size, zero_size_allowed); 5915 if (err) { 5916 verbose(env, "R%d max value is outside of the allowed memory range\n", 5917 regno); 5918 return err; 5919 } 5920 5921 return 0; 5922 } 5923 5924 static int __check_ptr_off_reg(struct bpf_verifier_env *env, 5925 const struct bpf_reg_state *reg, int regno, 5926 bool fixed_off_ok) 5927 { 5928 /* Access to this pointer-typed register or passing it to a helper 5929 * is only allowed in its original, unmodified form. 5930 */ 5931 5932 if (reg->off < 0) { 5933 verbose(env, "negative offset %s ptr R%d off=%d disallowed\n", 5934 reg_type_str(env, reg->type), regno, reg->off); 5935 return -EACCES; 5936 } 5937 5938 if (!fixed_off_ok && reg->off) { 5939 verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n", 5940 reg_type_str(env, reg->type), regno, reg->off); 5941 return -EACCES; 5942 } 5943 5944 if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 5945 char tn_buf[48]; 5946 5947 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 5948 verbose(env, "variable %s access var_off=%s disallowed\n", 5949 reg_type_str(env, reg->type), tn_buf); 5950 return -EACCES; 5951 } 5952 5953 return 0; 5954 } 5955 5956 static int check_ptr_off_reg(struct bpf_verifier_env *env, 5957 const struct bpf_reg_state *reg, int regno) 5958 { 5959 return __check_ptr_off_reg(env, reg, regno, false); 5960 } 5961 5962 static int map_kptr_match_type(struct bpf_verifier_env *env, 5963 struct btf_field *kptr_field, 5964 struct bpf_reg_state *reg, u32 regno) 5965 { 5966 const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id); 5967 int perm_flags; 5968 const char *reg_name = ""; 5969 5970 if (btf_is_kernel(reg->btf)) { 5971 perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU; 5972 5973 /* Only unreferenced case accepts untrusted pointers */ 5974 if (kptr_field->type == BPF_KPTR_UNREF) 5975 perm_flags |= PTR_UNTRUSTED; 5976 } else { 5977 perm_flags = PTR_MAYBE_NULL | MEM_ALLOC; 5978 if (kptr_field->type == BPF_KPTR_PERCPU) 5979 perm_flags |= MEM_PERCPU; 5980 } 5981 5982 if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags)) 5983 goto bad_type; 5984 5985 /* We need to verify reg->type and reg->btf, before accessing reg->btf */ 5986 reg_name = btf_type_name(reg->btf, reg->btf_id); 5987 5988 /* For ref_ptr case, release function check should ensure we get one 5989 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the 5990 * normal store of unreferenced kptr, we must ensure var_off is zero. 5991 * Since ref_ptr cannot be accessed directly by BPF insns, checks for 5992 * reg->off and reg->ref_obj_id are not needed here. 5993 */ 5994 if (__check_ptr_off_reg(env, reg, regno, true)) 5995 return -EACCES; 5996 5997 /* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and 5998 * we also need to take into account the reg->off. 5999 * 6000 * We want to support cases like: 6001 * 6002 * struct foo { 6003 * struct bar br; 6004 * struct baz bz; 6005 * }; 6006 * 6007 * struct foo *v; 6008 * v = func(); // PTR_TO_BTF_ID 6009 * val->foo = v; // reg->off is zero, btf and btf_id match type 6010 * val->bar = &v->br; // reg->off is still zero, but we need to retry with 6011 * // first member type of struct after comparison fails 6012 * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked 6013 * // to match type 6014 * 6015 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off 6016 * is zero. We must also ensure that btf_struct_ids_match does not walk 6017 * the struct to match type against first member of struct, i.e. reject 6018 * second case from above. Hence, when type is BPF_KPTR_REF, we set 6019 * strict mode to true for type match. 6020 */ 6021 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, 6022 kptr_field->kptr.btf, kptr_field->kptr.btf_id, 6023 kptr_field->type != BPF_KPTR_UNREF)) 6024 goto bad_type; 6025 return 0; 6026 bad_type: 6027 verbose(env, "invalid kptr access, R%d type=%s%s ", regno, 6028 reg_type_str(env, reg->type), reg_name); 6029 verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name); 6030 if (kptr_field->type == BPF_KPTR_UNREF) 6031 verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED), 6032 targ_name); 6033 else 6034 verbose(env, "\n"); 6035 return -EINVAL; 6036 } 6037 6038 static bool in_sleepable(struct bpf_verifier_env *env) 6039 { 6040 return env->cur_state->in_sleepable; 6041 } 6042 6043 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock() 6044 * can dereference RCU protected pointers and result is PTR_TRUSTED. 6045 */ 6046 static bool in_rcu_cs(struct bpf_verifier_env *env) 6047 { 6048 return env->cur_state->active_rcu_locks || 6049 env->cur_state->active_locks || 6050 !in_sleepable(env); 6051 } 6052 6053 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */ 6054 BTF_SET_START(rcu_protected_types) 6055 #ifdef CONFIG_NET 6056 BTF_ID(struct, prog_test_ref_kfunc) 6057 #endif 6058 #ifdef CONFIG_CGROUPS 6059 BTF_ID(struct, cgroup) 6060 #endif 6061 #ifdef CONFIG_BPF_JIT 6062 BTF_ID(struct, bpf_cpumask) 6063 #endif 6064 BTF_ID(struct, task_struct) 6065 #ifdef CONFIG_CRYPTO 6066 BTF_ID(struct, bpf_crypto_ctx) 6067 #endif 6068 BTF_SET_END(rcu_protected_types) 6069 6070 static bool rcu_protected_object(const struct btf *btf, u32 btf_id) 6071 { 6072 if (!btf_is_kernel(btf)) 6073 return true; 6074 return btf_id_set_contains(&rcu_protected_types, btf_id); 6075 } 6076 6077 static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field) 6078 { 6079 struct btf_struct_meta *meta; 6080 6081 if (btf_is_kernel(kptr_field->kptr.btf)) 6082 return NULL; 6083 6084 meta = btf_find_struct_meta(kptr_field->kptr.btf, 6085 kptr_field->kptr.btf_id); 6086 6087 return meta ? meta->record : NULL; 6088 } 6089 6090 static bool rcu_safe_kptr(const struct btf_field *field) 6091 { 6092 const struct btf_field_kptr *kptr = &field->kptr; 6093 6094 return field->type == BPF_KPTR_PERCPU || 6095 (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id)); 6096 } 6097 6098 static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field) 6099 { 6100 struct btf_record *rec; 6101 u32 ret; 6102 6103 ret = PTR_MAYBE_NULL; 6104 if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) { 6105 ret |= MEM_RCU; 6106 if (kptr_field->type == BPF_KPTR_PERCPU) 6107 ret |= MEM_PERCPU; 6108 else if (!btf_is_kernel(kptr_field->kptr.btf)) 6109 ret |= MEM_ALLOC; 6110 6111 rec = kptr_pointee_btf_record(kptr_field); 6112 if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE)) 6113 ret |= NON_OWN_REF; 6114 } else { 6115 ret |= PTR_UNTRUSTED; 6116 } 6117 6118 return ret; 6119 } 6120 6121 static int mark_uptr_ld_reg(struct bpf_verifier_env *env, u32 regno, 6122 struct btf_field *field) 6123 { 6124 struct bpf_reg_state *reg; 6125 const struct btf_type *t; 6126 6127 t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id); 6128 mark_reg_known_zero(env, cur_regs(env), regno); 6129 reg = reg_state(env, regno); 6130 reg->type = PTR_TO_MEM | PTR_MAYBE_NULL; 6131 reg->mem_size = t->size; 6132 reg->id = ++env->id_gen; 6133 6134 return 0; 6135 } 6136 6137 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, 6138 int value_regno, int insn_idx, 6139 struct btf_field *kptr_field) 6140 { 6141 struct bpf_insn *insn = &env->prog->insnsi[insn_idx]; 6142 int class = BPF_CLASS(insn->code); 6143 struct bpf_reg_state *val_reg; 6144 int ret; 6145 6146 /* Things we already checked for in check_map_access and caller: 6147 * - Reject cases where variable offset may touch kptr 6148 * - size of access (must be BPF_DW) 6149 * - tnum_is_const(reg->var_off) 6150 * - kptr_field->offset == off + reg->var_off.value 6151 */ 6152 /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */ 6153 if (BPF_MODE(insn->code) != BPF_MEM) { 6154 verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n"); 6155 return -EACCES; 6156 } 6157 6158 /* We only allow loading referenced kptr, since it will be marked as 6159 * untrusted, similar to unreferenced kptr. 6160 */ 6161 if (class != BPF_LDX && 6162 (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) { 6163 verbose(env, "store to referenced kptr disallowed\n"); 6164 return -EACCES; 6165 } 6166 if (class != BPF_LDX && kptr_field->type == BPF_UPTR) { 6167 verbose(env, "store to uptr disallowed\n"); 6168 return -EACCES; 6169 } 6170 6171 if (class == BPF_LDX) { 6172 if (kptr_field->type == BPF_UPTR) 6173 return mark_uptr_ld_reg(env, value_regno, kptr_field); 6174 6175 /* We can simply mark the value_regno receiving the pointer 6176 * value from map as PTR_TO_BTF_ID, with the correct type. 6177 */ 6178 ret = mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, 6179 kptr_field->kptr.btf, kptr_field->kptr.btf_id, 6180 btf_ld_kptr_type(env, kptr_field)); 6181 if (ret < 0) 6182 return ret; 6183 } else if (class == BPF_STX) { 6184 val_reg = reg_state(env, value_regno); 6185 if (!register_is_null(val_reg) && 6186 map_kptr_match_type(env, kptr_field, val_reg, value_regno)) 6187 return -EACCES; 6188 } else if (class == BPF_ST) { 6189 if (insn->imm) { 6190 verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n", 6191 kptr_field->offset); 6192 return -EACCES; 6193 } 6194 } else { 6195 verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n"); 6196 return -EACCES; 6197 } 6198 return 0; 6199 } 6200 6201 /* 6202 * Return the size of the memory region accessible from a pointer to map value. 6203 * For INSN_ARRAY maps whole bpf_insn_array->ips array is accessible. 6204 */ 6205 static u32 map_mem_size(const struct bpf_map *map) 6206 { 6207 if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) 6208 return map->max_entries * sizeof(long); 6209 6210 return map->value_size; 6211 } 6212 6213 /* check read/write into a map element with possible variable offset */ 6214 static int check_map_access(struct bpf_verifier_env *env, u32 regno, 6215 int off, int size, bool zero_size_allowed, 6216 enum bpf_access_src src) 6217 { 6218 struct bpf_verifier_state *vstate = env->cur_state; 6219 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 6220 struct bpf_reg_state *reg = &state->regs[regno]; 6221 struct bpf_map *map = reg->map_ptr; 6222 u32 mem_size = map_mem_size(map); 6223 struct btf_record *rec; 6224 int err, i; 6225 6226 err = check_mem_region_access(env, regno, off, size, mem_size, zero_size_allowed); 6227 if (err) 6228 return err; 6229 6230 if (IS_ERR_OR_NULL(map->record)) 6231 return 0; 6232 rec = map->record; 6233 for (i = 0; i < rec->cnt; i++) { 6234 struct btf_field *field = &rec->fields[i]; 6235 u32 p = field->offset; 6236 6237 /* If any part of a field can be touched by load/store, reject 6238 * this program. To check that [x1, x2) overlaps with [y1, y2), 6239 * it is sufficient to check x1 < y2 && y1 < x2. 6240 */ 6241 if (reg->smin_value + off < p + field->size && 6242 p < reg->umax_value + off + size) { 6243 switch (field->type) { 6244 case BPF_KPTR_UNREF: 6245 case BPF_KPTR_REF: 6246 case BPF_KPTR_PERCPU: 6247 case BPF_UPTR: 6248 if (src != ACCESS_DIRECT) { 6249 verbose(env, "%s cannot be accessed indirectly by helper\n", 6250 btf_field_type_name(field->type)); 6251 return -EACCES; 6252 } 6253 if (!tnum_is_const(reg->var_off)) { 6254 verbose(env, "%s access cannot have variable offset\n", 6255 btf_field_type_name(field->type)); 6256 return -EACCES; 6257 } 6258 if (p != off + reg->var_off.value) { 6259 verbose(env, "%s access misaligned expected=%u off=%llu\n", 6260 btf_field_type_name(field->type), 6261 p, off + reg->var_off.value); 6262 return -EACCES; 6263 } 6264 if (size != bpf_size_to_bytes(BPF_DW)) { 6265 verbose(env, "%s access size must be BPF_DW\n", 6266 btf_field_type_name(field->type)); 6267 return -EACCES; 6268 } 6269 break; 6270 default: 6271 verbose(env, "%s cannot be accessed directly by load/store\n", 6272 btf_field_type_name(field->type)); 6273 return -EACCES; 6274 } 6275 } 6276 } 6277 return 0; 6278 } 6279 6280 #define MAX_PACKET_OFF 0xffff 6281 6282 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, 6283 const struct bpf_call_arg_meta *meta, 6284 enum bpf_access_type t) 6285 { 6286 enum bpf_prog_type prog_type = resolve_prog_type(env->prog); 6287 6288 switch (prog_type) { 6289 /* Program types only with direct read access go here! */ 6290 case BPF_PROG_TYPE_LWT_IN: 6291 case BPF_PROG_TYPE_LWT_OUT: 6292 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 6293 case BPF_PROG_TYPE_SK_REUSEPORT: 6294 case BPF_PROG_TYPE_FLOW_DISSECTOR: 6295 case BPF_PROG_TYPE_CGROUP_SKB: 6296 if (t == BPF_WRITE) 6297 return false; 6298 fallthrough; 6299 6300 /* Program types with direct read + write access go here! */ 6301 case BPF_PROG_TYPE_SCHED_CLS: 6302 case BPF_PROG_TYPE_SCHED_ACT: 6303 case BPF_PROG_TYPE_XDP: 6304 case BPF_PROG_TYPE_LWT_XMIT: 6305 case BPF_PROG_TYPE_SK_SKB: 6306 case BPF_PROG_TYPE_SK_MSG: 6307 if (meta) 6308 return meta->pkt_access; 6309 6310 env->seen_direct_write = true; 6311 return true; 6312 6313 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 6314 if (t == BPF_WRITE) 6315 env->seen_direct_write = true; 6316 6317 return true; 6318 6319 default: 6320 return false; 6321 } 6322 } 6323 6324 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, 6325 int size, bool zero_size_allowed) 6326 { 6327 struct bpf_reg_state *reg = reg_state(env, regno); 6328 int err; 6329 6330 /* We may have added a variable offset to the packet pointer; but any 6331 * reg->range we have comes after that. We are only checking the fixed 6332 * offset. 6333 */ 6334 6335 /* We don't allow negative numbers, because we aren't tracking enough 6336 * detail to prove they're safe. 6337 */ 6338 if (reg->smin_value < 0) { 6339 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 6340 regno); 6341 return -EACCES; 6342 } 6343 6344 err = reg->range < 0 ? -EINVAL : 6345 __check_mem_access(env, regno, off, size, reg->range, 6346 zero_size_allowed); 6347 if (err) { 6348 verbose(env, "R%d offset is outside of the packet\n", regno); 6349 return err; 6350 } 6351 6352 /* __check_mem_access has made sure "off + size - 1" is within u16. 6353 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff, 6354 * otherwise find_good_pkt_pointers would have refused to set range info 6355 * that __check_mem_access would have rejected this pkt access. 6356 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32. 6357 */ 6358 env->prog->aux->max_pkt_offset = 6359 max_t(u32, env->prog->aux->max_pkt_offset, 6360 off + reg->umax_value + size - 1); 6361 6362 return err; 6363 } 6364 6365 /* check access to 'struct bpf_context' fields. Supports fixed offsets only */ 6366 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, 6367 enum bpf_access_type t, struct bpf_insn_access_aux *info) 6368 { 6369 if (env->ops->is_valid_access && 6370 env->ops->is_valid_access(off, size, t, env->prog, info)) { 6371 /* A non zero info.ctx_field_size indicates that this field is a 6372 * candidate for later verifier transformation to load the whole 6373 * field and then apply a mask when accessed with a narrower 6374 * access than actual ctx access size. A zero info.ctx_field_size 6375 * will only allow for whole field access and rejects any other 6376 * type of narrower access. 6377 */ 6378 if (base_type(info->reg_type) == PTR_TO_BTF_ID) { 6379 if (info->ref_obj_id && 6380 !find_reference_state(env->cur_state, info->ref_obj_id)) { 6381 verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n", 6382 off); 6383 return -EACCES; 6384 } 6385 } else { 6386 env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size; 6387 } 6388 /* remember the offset of last byte accessed in ctx */ 6389 if (env->prog->aux->max_ctx_offset < off + size) 6390 env->prog->aux->max_ctx_offset = off + size; 6391 return 0; 6392 } 6393 6394 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size); 6395 return -EACCES; 6396 } 6397 6398 static int check_flow_keys_access(struct bpf_verifier_env *env, int off, 6399 int size) 6400 { 6401 if (size < 0 || off < 0 || 6402 (u64)off + size > sizeof(struct bpf_flow_keys)) { 6403 verbose(env, "invalid access to flow keys off=%d size=%d\n", 6404 off, size); 6405 return -EACCES; 6406 } 6407 return 0; 6408 } 6409 6410 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, 6411 u32 regno, int off, int size, 6412 enum bpf_access_type t) 6413 { 6414 struct bpf_reg_state *reg = reg_state(env, regno); 6415 struct bpf_insn_access_aux info = {}; 6416 bool valid; 6417 6418 if (reg->smin_value < 0) { 6419 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 6420 regno); 6421 return -EACCES; 6422 } 6423 6424 switch (reg->type) { 6425 case PTR_TO_SOCK_COMMON: 6426 valid = bpf_sock_common_is_valid_access(off, size, t, &info); 6427 break; 6428 case PTR_TO_SOCKET: 6429 valid = bpf_sock_is_valid_access(off, size, t, &info); 6430 break; 6431 case PTR_TO_TCP_SOCK: 6432 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); 6433 break; 6434 case PTR_TO_XDP_SOCK: 6435 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info); 6436 break; 6437 default: 6438 valid = false; 6439 } 6440 6441 6442 if (valid) { 6443 env->insn_aux_data[insn_idx].ctx_field_size = 6444 info.ctx_field_size; 6445 return 0; 6446 } 6447 6448 verbose(env, "R%d invalid %s access off=%d size=%d\n", 6449 regno, reg_type_str(env, reg->type), off, size); 6450 6451 return -EACCES; 6452 } 6453 6454 static bool is_pointer_value(struct bpf_verifier_env *env, int regno) 6455 { 6456 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno)); 6457 } 6458 6459 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) 6460 { 6461 const struct bpf_reg_state *reg = reg_state(env, regno); 6462 6463 return reg->type == PTR_TO_CTX; 6464 } 6465 6466 static bool is_sk_reg(struct bpf_verifier_env *env, int regno) 6467 { 6468 const struct bpf_reg_state *reg = reg_state(env, regno); 6469 6470 return type_is_sk_pointer(reg->type); 6471 } 6472 6473 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) 6474 { 6475 const struct bpf_reg_state *reg = reg_state(env, regno); 6476 6477 return type_is_pkt_pointer(reg->type); 6478 } 6479 6480 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) 6481 { 6482 const struct bpf_reg_state *reg = reg_state(env, regno); 6483 6484 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */ 6485 return reg->type == PTR_TO_FLOW_KEYS; 6486 } 6487 6488 static bool is_arena_reg(struct bpf_verifier_env *env, int regno) 6489 { 6490 const struct bpf_reg_state *reg = reg_state(env, regno); 6491 6492 return reg->type == PTR_TO_ARENA; 6493 } 6494 6495 /* Return false if @regno contains a pointer whose type isn't supported for 6496 * atomic instruction @insn. 6497 */ 6498 static bool atomic_ptr_type_ok(struct bpf_verifier_env *env, int regno, 6499 struct bpf_insn *insn) 6500 { 6501 if (is_ctx_reg(env, regno)) 6502 return false; 6503 if (is_pkt_reg(env, regno)) 6504 return false; 6505 if (is_flow_key_reg(env, regno)) 6506 return false; 6507 if (is_sk_reg(env, regno)) 6508 return false; 6509 if (is_arena_reg(env, regno)) 6510 return bpf_jit_supports_insn(insn, true); 6511 6512 return true; 6513 } 6514 6515 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { 6516 #ifdef CONFIG_NET 6517 [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], 6518 [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 6519 [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP], 6520 #endif 6521 [CONST_PTR_TO_MAP] = btf_bpf_map_id, 6522 }; 6523 6524 static bool is_trusted_reg(const struct bpf_reg_state *reg) 6525 { 6526 /* A referenced register is always trusted. */ 6527 if (reg->ref_obj_id) 6528 return true; 6529 6530 /* Types listed in the reg2btf_ids are always trusted */ 6531 if (reg2btf_ids[base_type(reg->type)] && 6532 !bpf_type_has_unsafe_modifiers(reg->type)) 6533 return true; 6534 6535 /* If a register is not referenced, it is trusted if it has the 6536 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the 6537 * other type modifiers may be safe, but we elect to take an opt-in 6538 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are 6539 * not. 6540 * 6541 * Eventually, we should make PTR_TRUSTED the single source of truth 6542 * for whether a register is trusted. 6543 */ 6544 return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS && 6545 !bpf_type_has_unsafe_modifiers(reg->type); 6546 } 6547 6548 static bool is_rcu_reg(const struct bpf_reg_state *reg) 6549 { 6550 return reg->type & MEM_RCU; 6551 } 6552 6553 static void clear_trusted_flags(enum bpf_type_flag *flag) 6554 { 6555 *flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU); 6556 } 6557 6558 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, 6559 const struct bpf_reg_state *reg, 6560 int off, int size, bool strict) 6561 { 6562 struct tnum reg_off; 6563 int ip_align; 6564 6565 /* Byte size accesses are always allowed. */ 6566 if (!strict || size == 1) 6567 return 0; 6568 6569 /* For platforms that do not have a Kconfig enabling 6570 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of 6571 * NET_IP_ALIGN is universally set to '2'. And on platforms 6572 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get 6573 * to this code only in strict mode where we want to emulate 6574 * the NET_IP_ALIGN==2 checking. Therefore use an 6575 * unconditional IP align value of '2'. 6576 */ 6577 ip_align = 2; 6578 6579 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off)); 6580 if (!tnum_is_aligned(reg_off, size)) { 6581 char tn_buf[48]; 6582 6583 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 6584 verbose(env, 6585 "misaligned packet access off %d+%s+%d+%d size %d\n", 6586 ip_align, tn_buf, reg->off, off, size); 6587 return -EACCES; 6588 } 6589 6590 return 0; 6591 } 6592 6593 static int check_generic_ptr_alignment(struct bpf_verifier_env *env, 6594 const struct bpf_reg_state *reg, 6595 const char *pointer_desc, 6596 int off, int size, bool strict) 6597 { 6598 struct tnum reg_off; 6599 6600 /* Byte size accesses are always allowed. */ 6601 if (!strict || size == 1) 6602 return 0; 6603 6604 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off)); 6605 if (!tnum_is_aligned(reg_off, size)) { 6606 char tn_buf[48]; 6607 6608 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 6609 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n", 6610 pointer_desc, tn_buf, reg->off, off, size); 6611 return -EACCES; 6612 } 6613 6614 return 0; 6615 } 6616 6617 static int check_ptr_alignment(struct bpf_verifier_env *env, 6618 const struct bpf_reg_state *reg, int off, 6619 int size, bool strict_alignment_once) 6620 { 6621 bool strict = env->strict_alignment || strict_alignment_once; 6622 const char *pointer_desc = ""; 6623 6624 switch (reg->type) { 6625 case PTR_TO_PACKET: 6626 case PTR_TO_PACKET_META: 6627 /* Special case, because of NET_IP_ALIGN. Given metadata sits 6628 * right in front, treat it the very same way. 6629 */ 6630 return check_pkt_ptr_alignment(env, reg, off, size, strict); 6631 case PTR_TO_FLOW_KEYS: 6632 pointer_desc = "flow keys "; 6633 break; 6634 case PTR_TO_MAP_KEY: 6635 pointer_desc = "key "; 6636 break; 6637 case PTR_TO_MAP_VALUE: 6638 pointer_desc = "value "; 6639 if (reg->map_ptr->map_type == BPF_MAP_TYPE_INSN_ARRAY) 6640 strict = true; 6641 break; 6642 case PTR_TO_CTX: 6643 pointer_desc = "context "; 6644 break; 6645 case PTR_TO_STACK: 6646 pointer_desc = "stack "; 6647 /* The stack spill tracking logic in check_stack_write_fixed_off() 6648 * and check_stack_read_fixed_off() relies on stack accesses being 6649 * aligned. 6650 */ 6651 strict = true; 6652 break; 6653 case PTR_TO_SOCKET: 6654 pointer_desc = "sock "; 6655 break; 6656 case PTR_TO_SOCK_COMMON: 6657 pointer_desc = "sock_common "; 6658 break; 6659 case PTR_TO_TCP_SOCK: 6660 pointer_desc = "tcp_sock "; 6661 break; 6662 case PTR_TO_XDP_SOCK: 6663 pointer_desc = "xdp_sock "; 6664 break; 6665 case PTR_TO_ARENA: 6666 return 0; 6667 default: 6668 break; 6669 } 6670 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size, 6671 strict); 6672 } 6673 6674 static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog) 6675 { 6676 if (!bpf_jit_supports_private_stack()) 6677 return NO_PRIV_STACK; 6678 6679 /* bpf_prog_check_recur() checks all prog types that use bpf trampoline 6680 * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked 6681 * explicitly. 6682 */ 6683 switch (prog->type) { 6684 case BPF_PROG_TYPE_KPROBE: 6685 case BPF_PROG_TYPE_TRACEPOINT: 6686 case BPF_PROG_TYPE_PERF_EVENT: 6687 case BPF_PROG_TYPE_RAW_TRACEPOINT: 6688 return PRIV_STACK_ADAPTIVE; 6689 case BPF_PROG_TYPE_TRACING: 6690 case BPF_PROG_TYPE_LSM: 6691 case BPF_PROG_TYPE_STRUCT_OPS: 6692 if (prog->aux->priv_stack_requested || bpf_prog_check_recur(prog)) 6693 return PRIV_STACK_ADAPTIVE; 6694 fallthrough; 6695 default: 6696 break; 6697 } 6698 6699 return NO_PRIV_STACK; 6700 } 6701 6702 static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth) 6703 { 6704 if (env->prog->jit_requested) 6705 return round_up(stack_depth, 16); 6706 6707 /* round up to 32-bytes, since this is granularity 6708 * of interpreter stack size 6709 */ 6710 return round_up(max_t(u32, stack_depth, 1), 32); 6711 } 6712 6713 /* starting from main bpf function walk all instructions of the function 6714 * and recursively walk all callees that given function can call. 6715 * Ignore jump and exit insns. 6716 * Since recursion is prevented by check_cfg() this algorithm 6717 * only needs a local stack of MAX_CALL_FRAMES to remember callsites 6718 */ 6719 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx, 6720 bool priv_stack_supported) 6721 { 6722 struct bpf_subprog_info *subprog = env->subprog_info; 6723 struct bpf_insn *insn = env->prog->insnsi; 6724 int depth = 0, frame = 0, i, subprog_end, subprog_depth; 6725 bool tail_call_reachable = false; 6726 int ret_insn[MAX_CALL_FRAMES]; 6727 int ret_prog[MAX_CALL_FRAMES]; 6728 int j; 6729 6730 i = subprog[idx].start; 6731 if (!priv_stack_supported) 6732 subprog[idx].priv_stack_mode = NO_PRIV_STACK; 6733 process_func: 6734 /* protect against potential stack overflow that might happen when 6735 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack 6736 * depth for such case down to 256 so that the worst case scenario 6737 * would result in 8k stack size (32 which is tailcall limit * 256 = 6738 * 8k). 6739 * 6740 * To get the idea what might happen, see an example: 6741 * func1 -> sub rsp, 128 6742 * subfunc1 -> sub rsp, 256 6743 * tailcall1 -> add rsp, 256 6744 * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320) 6745 * subfunc2 -> sub rsp, 64 6746 * subfunc22 -> sub rsp, 128 6747 * tailcall2 -> add rsp, 128 6748 * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416) 6749 * 6750 * tailcall will unwind the current stack frame but it will not get rid 6751 * of caller's stack as shown on the example above. 6752 */ 6753 if (idx && subprog[idx].has_tail_call && depth >= 256) { 6754 verbose(env, 6755 "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n", 6756 depth); 6757 return -EACCES; 6758 } 6759 6760 subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth); 6761 if (priv_stack_supported) { 6762 /* Request private stack support only if the subprog stack 6763 * depth is no less than BPF_PRIV_STACK_MIN_SIZE. This is to 6764 * avoid jit penalty if the stack usage is small. 6765 */ 6766 if (subprog[idx].priv_stack_mode == PRIV_STACK_UNKNOWN && 6767 subprog_depth >= BPF_PRIV_STACK_MIN_SIZE) 6768 subprog[idx].priv_stack_mode = PRIV_STACK_ADAPTIVE; 6769 } 6770 6771 if (subprog[idx].priv_stack_mode == PRIV_STACK_ADAPTIVE) { 6772 if (subprog_depth > MAX_BPF_STACK) { 6773 verbose(env, "stack size of subprog %d is %d. Too large\n", 6774 idx, subprog_depth); 6775 return -EACCES; 6776 } 6777 } else { 6778 depth += subprog_depth; 6779 if (depth > MAX_BPF_STACK) { 6780 verbose(env, "combined stack size of %d calls is %d. Too large\n", 6781 frame + 1, depth); 6782 return -EACCES; 6783 } 6784 } 6785 continue_func: 6786 subprog_end = subprog[idx + 1].start; 6787 for (; i < subprog_end; i++) { 6788 int next_insn, sidx; 6789 6790 if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) { 6791 bool err = false; 6792 6793 if (!is_bpf_throw_kfunc(insn + i)) 6794 continue; 6795 if (subprog[idx].is_cb) 6796 err = true; 6797 for (int c = 0; c < frame && !err; c++) { 6798 if (subprog[ret_prog[c]].is_cb) { 6799 err = true; 6800 break; 6801 } 6802 } 6803 if (!err) 6804 continue; 6805 verbose(env, 6806 "bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n", 6807 i, idx); 6808 return -EINVAL; 6809 } 6810 6811 if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i)) 6812 continue; 6813 /* remember insn and function to return to */ 6814 ret_insn[frame] = i + 1; 6815 ret_prog[frame] = idx; 6816 6817 /* find the callee */ 6818 next_insn = i + insn[i].imm + 1; 6819 sidx = find_subprog(env, next_insn); 6820 if (verifier_bug_if(sidx < 0, env, "callee not found at insn %d", next_insn)) 6821 return -EFAULT; 6822 if (subprog[sidx].is_async_cb) { 6823 if (subprog[sidx].has_tail_call) { 6824 verifier_bug(env, "subprog has tail_call and async cb"); 6825 return -EFAULT; 6826 } 6827 /* async callbacks don't increase bpf prog stack size unless called directly */ 6828 if (!bpf_pseudo_call(insn + i)) 6829 continue; 6830 if (subprog[sidx].is_exception_cb) { 6831 verbose(env, "insn %d cannot call exception cb directly", i); 6832 return -EINVAL; 6833 } 6834 } 6835 i = next_insn; 6836 idx = sidx; 6837 if (!priv_stack_supported) 6838 subprog[idx].priv_stack_mode = NO_PRIV_STACK; 6839 6840 if (subprog[idx].has_tail_call) 6841 tail_call_reachable = true; 6842 6843 frame++; 6844 if (frame >= MAX_CALL_FRAMES) { 6845 verbose(env, "the call stack of %d frames is too deep !\n", 6846 frame); 6847 return -E2BIG; 6848 } 6849 goto process_func; 6850 } 6851 /* if tail call got detected across bpf2bpf calls then mark each of the 6852 * currently present subprog frames as tail call reachable subprogs; 6853 * this info will be utilized by JIT so that we will be preserving the 6854 * tail call counter throughout bpf2bpf calls combined with tailcalls 6855 */ 6856 if (tail_call_reachable) 6857 for (j = 0; j < frame; j++) { 6858 if (subprog[ret_prog[j]].is_exception_cb) { 6859 verbose(env, "cannot tail call within exception cb\n"); 6860 return -EINVAL; 6861 } 6862 subprog[ret_prog[j]].tail_call_reachable = true; 6863 } 6864 if (subprog[0].tail_call_reachable) 6865 env->prog->aux->tail_call_reachable = true; 6866 6867 /* end of for() loop means the last insn of the 'subprog' 6868 * was reached. Doesn't matter whether it was JA or EXIT 6869 */ 6870 if (frame == 0) 6871 return 0; 6872 if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE) 6873 depth -= round_up_stack_depth(env, subprog[idx].stack_depth); 6874 frame--; 6875 i = ret_insn[frame]; 6876 idx = ret_prog[frame]; 6877 goto continue_func; 6878 } 6879 6880 static int check_max_stack_depth(struct bpf_verifier_env *env) 6881 { 6882 enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN; 6883 struct bpf_subprog_info *si = env->subprog_info; 6884 bool priv_stack_supported; 6885 int ret; 6886 6887 for (int i = 0; i < env->subprog_cnt; i++) { 6888 if (si[i].has_tail_call) { 6889 priv_stack_mode = NO_PRIV_STACK; 6890 break; 6891 } 6892 } 6893 6894 if (priv_stack_mode == PRIV_STACK_UNKNOWN) 6895 priv_stack_mode = bpf_enable_priv_stack(env->prog); 6896 6897 /* All async_cb subprogs use normal kernel stack. If a particular 6898 * subprog appears in both main prog and async_cb subtree, that 6899 * subprog will use normal kernel stack to avoid potential nesting. 6900 * The reverse subprog traversal ensures when main prog subtree is 6901 * checked, the subprogs appearing in async_cb subtrees are already 6902 * marked as using normal kernel stack, so stack size checking can 6903 * be done properly. 6904 */ 6905 for (int i = env->subprog_cnt - 1; i >= 0; i--) { 6906 if (!i || si[i].is_async_cb) { 6907 priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE; 6908 ret = check_max_stack_depth_subprog(env, i, priv_stack_supported); 6909 if (ret < 0) 6910 return ret; 6911 } 6912 } 6913 6914 for (int i = 0; i < env->subprog_cnt; i++) { 6915 if (si[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) { 6916 env->prog->aux->jits_use_priv_stack = true; 6917 break; 6918 } 6919 } 6920 6921 return 0; 6922 } 6923 6924 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 6925 static int get_callee_stack_depth(struct bpf_verifier_env *env, 6926 const struct bpf_insn *insn, int idx) 6927 { 6928 int start = idx + insn->imm + 1, subprog; 6929 6930 subprog = find_subprog(env, start); 6931 if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start)) 6932 return -EFAULT; 6933 return env->subprog_info[subprog].stack_depth; 6934 } 6935 #endif 6936 6937 static int __check_buffer_access(struct bpf_verifier_env *env, 6938 const char *buf_info, 6939 const struct bpf_reg_state *reg, 6940 int regno, int off, int size) 6941 { 6942 if (off < 0) { 6943 verbose(env, 6944 "R%d invalid %s buffer access: off=%d, size=%d\n", 6945 regno, buf_info, off, size); 6946 return -EACCES; 6947 } 6948 if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 6949 char tn_buf[48]; 6950 6951 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 6952 verbose(env, 6953 "R%d invalid variable buffer offset: off=%d, var_off=%s\n", 6954 regno, off, tn_buf); 6955 return -EACCES; 6956 } 6957 6958 return 0; 6959 } 6960 6961 static int check_tp_buffer_access(struct bpf_verifier_env *env, 6962 const struct bpf_reg_state *reg, 6963 int regno, int off, int size) 6964 { 6965 int err; 6966 6967 err = __check_buffer_access(env, "tracepoint", reg, regno, off, size); 6968 if (err) 6969 return err; 6970 6971 if (off + size > env->prog->aux->max_tp_access) 6972 env->prog->aux->max_tp_access = off + size; 6973 6974 return 0; 6975 } 6976 6977 static int check_buffer_access(struct bpf_verifier_env *env, 6978 const struct bpf_reg_state *reg, 6979 int regno, int off, int size, 6980 bool zero_size_allowed, 6981 u32 *max_access) 6982 { 6983 const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr"; 6984 int err; 6985 6986 err = __check_buffer_access(env, buf_info, reg, regno, off, size); 6987 if (err) 6988 return err; 6989 6990 if (off + size > *max_access) 6991 *max_access = off + size; 6992 6993 return 0; 6994 } 6995 6996 /* BPF architecture zero extends alu32 ops into 64-bit registesr */ 6997 static void zext_32_to_64(struct bpf_reg_state *reg) 6998 { 6999 reg->var_off = tnum_subreg(reg->var_off); 7000 __reg_assign_32_into_64(reg); 7001 } 7002 7003 /* truncate register to smaller size (in bytes) 7004 * must be called with size < BPF_REG_SIZE 7005 */ 7006 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) 7007 { 7008 u64 mask; 7009 7010 /* clear high bits in bit representation */ 7011 reg->var_off = tnum_cast(reg->var_off, size); 7012 7013 /* fix arithmetic bounds */ 7014 mask = ((u64)1 << (size * 8)) - 1; 7015 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) { 7016 reg->umin_value &= mask; 7017 reg->umax_value &= mask; 7018 } else { 7019 reg->umin_value = 0; 7020 reg->umax_value = mask; 7021 } 7022 reg->smin_value = reg->umin_value; 7023 reg->smax_value = reg->umax_value; 7024 7025 /* If size is smaller than 32bit register the 32bit register 7026 * values are also truncated so we push 64-bit bounds into 7027 * 32-bit bounds. Above were truncated < 32-bits already. 7028 */ 7029 if (size < 4) 7030 __mark_reg32_unbounded(reg); 7031 7032 reg_bounds_sync(reg); 7033 } 7034 7035 static void set_sext64_default_val(struct bpf_reg_state *reg, int size) 7036 { 7037 if (size == 1) { 7038 reg->smin_value = reg->s32_min_value = S8_MIN; 7039 reg->smax_value = reg->s32_max_value = S8_MAX; 7040 } else if (size == 2) { 7041 reg->smin_value = reg->s32_min_value = S16_MIN; 7042 reg->smax_value = reg->s32_max_value = S16_MAX; 7043 } else { 7044 /* size == 4 */ 7045 reg->smin_value = reg->s32_min_value = S32_MIN; 7046 reg->smax_value = reg->s32_max_value = S32_MAX; 7047 } 7048 reg->umin_value = reg->u32_min_value = 0; 7049 reg->umax_value = U64_MAX; 7050 reg->u32_max_value = U32_MAX; 7051 reg->var_off = tnum_unknown; 7052 } 7053 7054 static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size) 7055 { 7056 s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval; 7057 u64 top_smax_value, top_smin_value; 7058 u64 num_bits = size * 8; 7059 7060 if (tnum_is_const(reg->var_off)) { 7061 u64_cval = reg->var_off.value; 7062 if (size == 1) 7063 reg->var_off = tnum_const((s8)u64_cval); 7064 else if (size == 2) 7065 reg->var_off = tnum_const((s16)u64_cval); 7066 else 7067 /* size == 4 */ 7068 reg->var_off = tnum_const((s32)u64_cval); 7069 7070 u64_cval = reg->var_off.value; 7071 reg->smax_value = reg->smin_value = u64_cval; 7072 reg->umax_value = reg->umin_value = u64_cval; 7073 reg->s32_max_value = reg->s32_min_value = u64_cval; 7074 reg->u32_max_value = reg->u32_min_value = u64_cval; 7075 return; 7076 } 7077 7078 top_smax_value = ((u64)reg->smax_value >> num_bits) << num_bits; 7079 top_smin_value = ((u64)reg->smin_value >> num_bits) << num_bits; 7080 7081 if (top_smax_value != top_smin_value) 7082 goto out; 7083 7084 /* find the s64_min and s64_min after sign extension */ 7085 if (size == 1) { 7086 init_s64_max = (s8)reg->smax_value; 7087 init_s64_min = (s8)reg->smin_value; 7088 } else if (size == 2) { 7089 init_s64_max = (s16)reg->smax_value; 7090 init_s64_min = (s16)reg->smin_value; 7091 } else { 7092 init_s64_max = (s32)reg->smax_value; 7093 init_s64_min = (s32)reg->smin_value; 7094 } 7095 7096 s64_max = max(init_s64_max, init_s64_min); 7097 s64_min = min(init_s64_max, init_s64_min); 7098 7099 /* both of s64_max/s64_min positive or negative */ 7100 if ((s64_max >= 0) == (s64_min >= 0)) { 7101 reg->s32_min_value = reg->smin_value = s64_min; 7102 reg->s32_max_value = reg->smax_value = s64_max; 7103 reg->u32_min_value = reg->umin_value = s64_min; 7104 reg->u32_max_value = reg->umax_value = s64_max; 7105 reg->var_off = tnum_range(s64_min, s64_max); 7106 return; 7107 } 7108 7109 out: 7110 set_sext64_default_val(reg, size); 7111 } 7112 7113 static void set_sext32_default_val(struct bpf_reg_state *reg, int size) 7114 { 7115 if (size == 1) { 7116 reg->s32_min_value = S8_MIN; 7117 reg->s32_max_value = S8_MAX; 7118 } else { 7119 /* size == 2 */ 7120 reg->s32_min_value = S16_MIN; 7121 reg->s32_max_value = S16_MAX; 7122 } 7123 reg->u32_min_value = 0; 7124 reg->u32_max_value = U32_MAX; 7125 reg->var_off = tnum_subreg(tnum_unknown); 7126 } 7127 7128 static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size) 7129 { 7130 s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val; 7131 u32 top_smax_value, top_smin_value; 7132 u32 num_bits = size * 8; 7133 7134 if (tnum_is_const(reg->var_off)) { 7135 u32_val = reg->var_off.value; 7136 if (size == 1) 7137 reg->var_off = tnum_const((s8)u32_val); 7138 else 7139 reg->var_off = tnum_const((s16)u32_val); 7140 7141 u32_val = reg->var_off.value; 7142 reg->s32_min_value = reg->s32_max_value = u32_val; 7143 reg->u32_min_value = reg->u32_max_value = u32_val; 7144 return; 7145 } 7146 7147 top_smax_value = ((u32)reg->s32_max_value >> num_bits) << num_bits; 7148 top_smin_value = ((u32)reg->s32_min_value >> num_bits) << num_bits; 7149 7150 if (top_smax_value != top_smin_value) 7151 goto out; 7152 7153 /* find the s32_min and s32_min after sign extension */ 7154 if (size == 1) { 7155 init_s32_max = (s8)reg->s32_max_value; 7156 init_s32_min = (s8)reg->s32_min_value; 7157 } else { 7158 /* size == 2 */ 7159 init_s32_max = (s16)reg->s32_max_value; 7160 init_s32_min = (s16)reg->s32_min_value; 7161 } 7162 s32_max = max(init_s32_max, init_s32_min); 7163 s32_min = min(init_s32_max, init_s32_min); 7164 7165 if ((s32_min >= 0) == (s32_max >= 0)) { 7166 reg->s32_min_value = s32_min; 7167 reg->s32_max_value = s32_max; 7168 reg->u32_min_value = (u32)s32_min; 7169 reg->u32_max_value = (u32)s32_max; 7170 reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max)); 7171 return; 7172 } 7173 7174 out: 7175 set_sext32_default_val(reg, size); 7176 } 7177 7178 static bool bpf_map_is_rdonly(const struct bpf_map *map) 7179 { 7180 /* A map is considered read-only if the following condition are true: 7181 * 7182 * 1) BPF program side cannot change any of the map content. The 7183 * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map 7184 * and was set at map creation time. 7185 * 2) The map value(s) have been initialized from user space by a 7186 * loader and then "frozen", such that no new map update/delete 7187 * operations from syscall side are possible for the rest of 7188 * the map's lifetime from that point onwards. 7189 * 3) Any parallel/pending map update/delete operations from syscall 7190 * side have been completed. Only after that point, it's safe to 7191 * assume that map value(s) are immutable. 7192 */ 7193 return (map->map_flags & BPF_F_RDONLY_PROG) && 7194 READ_ONCE(map->frozen) && 7195 !bpf_map_write_active(map); 7196 } 7197 7198 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val, 7199 bool is_ldsx) 7200 { 7201 void *ptr; 7202 u64 addr; 7203 int err; 7204 7205 err = map->ops->map_direct_value_addr(map, &addr, off); 7206 if (err) 7207 return err; 7208 ptr = (void *)(long)addr + off; 7209 7210 switch (size) { 7211 case sizeof(u8): 7212 *val = is_ldsx ? (s64)*(s8 *)ptr : (u64)*(u8 *)ptr; 7213 break; 7214 case sizeof(u16): 7215 *val = is_ldsx ? (s64)*(s16 *)ptr : (u64)*(u16 *)ptr; 7216 break; 7217 case sizeof(u32): 7218 *val = is_ldsx ? (s64)*(s32 *)ptr : (u64)*(u32 *)ptr; 7219 break; 7220 case sizeof(u64): 7221 *val = *(u64 *)ptr; 7222 break; 7223 default: 7224 return -EINVAL; 7225 } 7226 return 0; 7227 } 7228 7229 #define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu) 7230 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type) __PASTE(__type, __safe_rcu_or_null) 7231 #define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted) 7232 #define BTF_TYPE_SAFE_TRUSTED_OR_NULL(__type) __PASTE(__type, __safe_trusted_or_null) 7233 7234 /* 7235 * Allow list few fields as RCU trusted or full trusted. 7236 * This logic doesn't allow mix tagging and will be removed once GCC supports 7237 * btf_type_tag. 7238 */ 7239 7240 /* RCU trusted: these fields are trusted in RCU CS and never NULL */ 7241 BTF_TYPE_SAFE_RCU(struct task_struct) { 7242 const cpumask_t *cpus_ptr; 7243 struct css_set __rcu *cgroups; 7244 struct task_struct __rcu *real_parent; 7245 struct task_struct *group_leader; 7246 }; 7247 7248 BTF_TYPE_SAFE_RCU(struct cgroup) { 7249 /* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */ 7250 struct kernfs_node *kn; 7251 }; 7252 7253 BTF_TYPE_SAFE_RCU(struct css_set) { 7254 struct cgroup *dfl_cgrp; 7255 }; 7256 7257 BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state) { 7258 struct cgroup *cgroup; 7259 }; 7260 7261 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */ 7262 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) { 7263 struct file __rcu *exe_file; 7264 #ifdef CONFIG_MEMCG 7265 struct task_struct __rcu *owner; 7266 #endif 7267 }; 7268 7269 /* skb->sk, req->sk are not RCU protected, but we mark them as such 7270 * because bpf prog accessible sockets are SOCK_RCU_FREE. 7271 */ 7272 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) { 7273 struct sock *sk; 7274 }; 7275 7276 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) { 7277 struct sock *sk; 7278 }; 7279 7280 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */ 7281 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) { 7282 struct seq_file *seq; 7283 }; 7284 7285 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) { 7286 struct bpf_iter_meta *meta; 7287 struct task_struct *task; 7288 }; 7289 7290 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) { 7291 struct file *file; 7292 }; 7293 7294 BTF_TYPE_SAFE_TRUSTED(struct file) { 7295 struct inode *f_inode; 7296 }; 7297 7298 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) { 7299 struct inode *d_inode; 7300 }; 7301 7302 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) { 7303 struct sock *sk; 7304 }; 7305 7306 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct) { 7307 struct mm_struct *vm_mm; 7308 struct file *vm_file; 7309 }; 7310 7311 static bool type_is_rcu(struct bpf_verifier_env *env, 7312 struct bpf_reg_state *reg, 7313 const char *field_name, u32 btf_id) 7314 { 7315 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct)); 7316 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup)); 7317 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set)); 7318 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state)); 7319 7320 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu"); 7321 } 7322 7323 static bool type_is_rcu_or_null(struct bpf_verifier_env *env, 7324 struct bpf_reg_state *reg, 7325 const char *field_name, u32 btf_id) 7326 { 7327 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct)); 7328 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff)); 7329 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock)); 7330 7331 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null"); 7332 } 7333 7334 static bool type_is_trusted(struct bpf_verifier_env *env, 7335 struct bpf_reg_state *reg, 7336 const char *field_name, u32 btf_id) 7337 { 7338 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta)); 7339 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task)); 7340 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm)); 7341 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file)); 7342 7343 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted"); 7344 } 7345 7346 static bool type_is_trusted_or_null(struct bpf_verifier_env *env, 7347 struct bpf_reg_state *reg, 7348 const char *field_name, u32 btf_id) 7349 { 7350 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket)); 7351 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry)); 7352 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct)); 7353 7354 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, 7355 "__safe_trusted_or_null"); 7356 } 7357 7358 static int check_ptr_to_btf_access(struct bpf_verifier_env *env, 7359 struct bpf_reg_state *regs, 7360 int regno, int off, int size, 7361 enum bpf_access_type atype, 7362 int value_regno) 7363 { 7364 struct bpf_reg_state *reg = regs + regno; 7365 const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id); 7366 const char *tname = btf_name_by_offset(reg->btf, t->name_off); 7367 const char *field_name = NULL; 7368 enum bpf_type_flag flag = 0; 7369 u32 btf_id = 0; 7370 int ret; 7371 7372 if (!env->allow_ptr_leaks) { 7373 verbose(env, 7374 "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n", 7375 tname); 7376 return -EPERM; 7377 } 7378 if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) { 7379 verbose(env, 7380 "Cannot access kernel 'struct %s' from non-GPL compatible program\n", 7381 tname); 7382 return -EINVAL; 7383 } 7384 if (off < 0) { 7385 verbose(env, 7386 "R%d is ptr_%s invalid negative access: off=%d\n", 7387 regno, tname, off); 7388 return -EACCES; 7389 } 7390 if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 7391 char tn_buf[48]; 7392 7393 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 7394 verbose(env, 7395 "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n", 7396 regno, tname, off, tn_buf); 7397 return -EACCES; 7398 } 7399 7400 if (reg->type & MEM_USER) { 7401 verbose(env, 7402 "R%d is ptr_%s access user memory: off=%d\n", 7403 regno, tname, off); 7404 return -EACCES; 7405 } 7406 7407 if (reg->type & MEM_PERCPU) { 7408 verbose(env, 7409 "R%d is ptr_%s access percpu memory: off=%d\n", 7410 regno, tname, off); 7411 return -EACCES; 7412 } 7413 7414 if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) { 7415 if (!btf_is_kernel(reg->btf)) { 7416 verifier_bug(env, "reg->btf must be kernel btf"); 7417 return -EFAULT; 7418 } 7419 ret = env->ops->btf_struct_access(&env->log, reg, off, size); 7420 } else { 7421 /* Writes are permitted with default btf_struct_access for 7422 * program allocated objects (which always have ref_obj_id > 0), 7423 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC. 7424 */ 7425 if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) { 7426 verbose(env, "only read is supported\n"); 7427 return -EACCES; 7428 } 7429 7430 if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) && 7431 !(reg->type & MEM_RCU) && !reg->ref_obj_id) { 7432 verifier_bug(env, "ref_obj_id for allocated object must be non-zero"); 7433 return -EFAULT; 7434 } 7435 7436 ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name); 7437 } 7438 7439 if (ret < 0) 7440 return ret; 7441 7442 if (ret != PTR_TO_BTF_ID) { 7443 /* just mark; */ 7444 7445 } else if (type_flag(reg->type) & PTR_UNTRUSTED) { 7446 /* If this is an untrusted pointer, all pointers formed by walking it 7447 * also inherit the untrusted flag. 7448 */ 7449 flag = PTR_UNTRUSTED; 7450 7451 } else if (is_trusted_reg(reg) || is_rcu_reg(reg)) { 7452 /* By default any pointer obtained from walking a trusted pointer is no 7453 * longer trusted, unless the field being accessed has explicitly been 7454 * marked as inheriting its parent's state of trust (either full or RCU). 7455 * For example: 7456 * 'cgroups' pointer is untrusted if task->cgroups dereference 7457 * happened in a sleepable program outside of bpf_rcu_read_lock() 7458 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU). 7459 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED. 7460 * 7461 * A regular RCU-protected pointer with __rcu tag can also be deemed 7462 * trusted if we are in an RCU CS. Such pointer can be NULL. 7463 */ 7464 if (type_is_trusted(env, reg, field_name, btf_id)) { 7465 flag |= PTR_TRUSTED; 7466 } else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) { 7467 flag |= PTR_TRUSTED | PTR_MAYBE_NULL; 7468 } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) { 7469 if (type_is_rcu(env, reg, field_name, btf_id)) { 7470 /* ignore __rcu tag and mark it MEM_RCU */ 7471 flag |= MEM_RCU; 7472 } else if (flag & MEM_RCU || 7473 type_is_rcu_or_null(env, reg, field_name, btf_id)) { 7474 /* __rcu tagged pointers can be NULL */ 7475 flag |= MEM_RCU | PTR_MAYBE_NULL; 7476 7477 /* We always trust them */ 7478 if (type_is_rcu_or_null(env, reg, field_name, btf_id) && 7479 flag & PTR_UNTRUSTED) 7480 flag &= ~PTR_UNTRUSTED; 7481 } else if (flag & (MEM_PERCPU | MEM_USER)) { 7482 /* keep as-is */ 7483 } else { 7484 /* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */ 7485 clear_trusted_flags(&flag); 7486 } 7487 } else { 7488 /* 7489 * If not in RCU CS or MEM_RCU pointer can be NULL then 7490 * aggressively mark as untrusted otherwise such 7491 * pointers will be plain PTR_TO_BTF_ID without flags 7492 * and will be allowed to be passed into helpers for 7493 * compat reasons. 7494 */ 7495 flag = PTR_UNTRUSTED; 7496 } 7497 } else { 7498 /* Old compat. Deprecated */ 7499 clear_trusted_flags(&flag); 7500 } 7501 7502 if (atype == BPF_READ && value_regno >= 0) { 7503 ret = mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag); 7504 if (ret < 0) 7505 return ret; 7506 } 7507 7508 return 0; 7509 } 7510 7511 static int check_ptr_to_map_access(struct bpf_verifier_env *env, 7512 struct bpf_reg_state *regs, 7513 int regno, int off, int size, 7514 enum bpf_access_type atype, 7515 int value_regno) 7516 { 7517 struct bpf_reg_state *reg = regs + regno; 7518 struct bpf_map *map = reg->map_ptr; 7519 struct bpf_reg_state map_reg; 7520 enum bpf_type_flag flag = 0; 7521 const struct btf_type *t; 7522 const char *tname; 7523 u32 btf_id; 7524 int ret; 7525 7526 if (!btf_vmlinux) { 7527 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n"); 7528 return -ENOTSUPP; 7529 } 7530 7531 if (!map->ops->map_btf_id || !*map->ops->map_btf_id) { 7532 verbose(env, "map_ptr access not supported for map type %d\n", 7533 map->map_type); 7534 return -ENOTSUPP; 7535 } 7536 7537 t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id); 7538 tname = btf_name_by_offset(btf_vmlinux, t->name_off); 7539 7540 if (!env->allow_ptr_leaks) { 7541 verbose(env, 7542 "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n", 7543 tname); 7544 return -EPERM; 7545 } 7546 7547 if (off < 0) { 7548 verbose(env, "R%d is %s invalid negative access: off=%d\n", 7549 regno, tname, off); 7550 return -EACCES; 7551 } 7552 7553 if (atype != BPF_READ) { 7554 verbose(env, "only read from %s is supported\n", tname); 7555 return -EACCES; 7556 } 7557 7558 /* Simulate access to a PTR_TO_BTF_ID */ 7559 memset(&map_reg, 0, sizeof(map_reg)); 7560 ret = mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, 7561 btf_vmlinux, *map->ops->map_btf_id, 0); 7562 if (ret < 0) 7563 return ret; 7564 ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL); 7565 if (ret < 0) 7566 return ret; 7567 7568 if (value_regno >= 0) { 7569 ret = mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag); 7570 if (ret < 0) 7571 return ret; 7572 } 7573 7574 return 0; 7575 } 7576 7577 /* Check that the stack access at the given offset is within bounds. The 7578 * maximum valid offset is -1. 7579 * 7580 * The minimum valid offset is -MAX_BPF_STACK for writes, and 7581 * -state->allocated_stack for reads. 7582 */ 7583 static int check_stack_slot_within_bounds(struct bpf_verifier_env *env, 7584 s64 off, 7585 struct bpf_func_state *state, 7586 enum bpf_access_type t) 7587 { 7588 int min_valid_off; 7589 7590 if (t == BPF_WRITE || env->allow_uninit_stack) 7591 min_valid_off = -MAX_BPF_STACK; 7592 else 7593 min_valid_off = -state->allocated_stack; 7594 7595 if (off < min_valid_off || off > -1) 7596 return -EACCES; 7597 return 0; 7598 } 7599 7600 /* Check that the stack access at 'regno + off' falls within the maximum stack 7601 * bounds. 7602 * 7603 * 'off' includes `regno->offset`, but not its dynamic part (if any). 7604 */ 7605 static int check_stack_access_within_bounds( 7606 struct bpf_verifier_env *env, 7607 int regno, int off, int access_size, 7608 enum bpf_access_type type) 7609 { 7610 struct bpf_reg_state *reg = reg_state(env, regno); 7611 struct bpf_func_state *state = func(env, reg); 7612 s64 min_off, max_off; 7613 int err; 7614 char *err_extra; 7615 7616 if (type == BPF_READ) 7617 err_extra = " read from"; 7618 else 7619 err_extra = " write to"; 7620 7621 if (tnum_is_const(reg->var_off)) { 7622 min_off = (s64)reg->var_off.value + off; 7623 max_off = min_off + access_size; 7624 } else { 7625 if (reg->smax_value >= BPF_MAX_VAR_OFF || 7626 reg->smin_value <= -BPF_MAX_VAR_OFF) { 7627 verbose(env, "invalid unbounded variable-offset%s stack R%d\n", 7628 err_extra, regno); 7629 return -EACCES; 7630 } 7631 min_off = reg->smin_value + off; 7632 max_off = reg->smax_value + off + access_size; 7633 } 7634 7635 err = check_stack_slot_within_bounds(env, min_off, state, type); 7636 if (!err && max_off > 0) 7637 err = -EINVAL; /* out of stack access into non-negative offsets */ 7638 if (!err && access_size < 0) 7639 /* access_size should not be negative (or overflow an int); others checks 7640 * along the way should have prevented such an access. 7641 */ 7642 err = -EFAULT; /* invalid negative access size; integer overflow? */ 7643 7644 if (err) { 7645 if (tnum_is_const(reg->var_off)) { 7646 verbose(env, "invalid%s stack R%d off=%d size=%d\n", 7647 err_extra, regno, off, access_size); 7648 } else { 7649 char tn_buf[48]; 7650 7651 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 7652 verbose(env, "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n", 7653 err_extra, regno, tn_buf, off, access_size); 7654 } 7655 return err; 7656 } 7657 7658 /* Note that there is no stack access with offset zero, so the needed stack 7659 * size is -min_off, not -min_off+1. 7660 */ 7661 return grow_stack_state(env, state, -min_off /* size */); 7662 } 7663 7664 static bool get_func_retval_range(struct bpf_prog *prog, 7665 struct bpf_retval_range *range) 7666 { 7667 if (prog->type == BPF_PROG_TYPE_LSM && 7668 prog->expected_attach_type == BPF_LSM_MAC && 7669 !bpf_lsm_get_retval_range(prog, range)) { 7670 return true; 7671 } 7672 return false; 7673 } 7674 7675 /* check whether memory at (regno + off) is accessible for t = (read | write) 7676 * if t==write, value_regno is a register which value is stored into memory 7677 * if t==read, value_regno is a register which will receive the value from memory 7678 * if t==write && value_regno==-1, some unknown value is stored into memory 7679 * if t==read && value_regno==-1, don't care what we read from memory 7680 */ 7681 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, 7682 int off, int bpf_size, enum bpf_access_type t, 7683 int value_regno, bool strict_alignment_once, bool is_ldsx) 7684 { 7685 struct bpf_reg_state *regs = cur_regs(env); 7686 struct bpf_reg_state *reg = regs + regno; 7687 int size, err = 0; 7688 7689 size = bpf_size_to_bytes(bpf_size); 7690 if (size < 0) 7691 return size; 7692 7693 /* alignment checks will add in reg->off themselves */ 7694 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once); 7695 if (err) 7696 return err; 7697 7698 /* for access checks, reg->off is just part of off */ 7699 off += reg->off; 7700 7701 if (reg->type == PTR_TO_MAP_KEY) { 7702 if (t == BPF_WRITE) { 7703 verbose(env, "write to change key R%d not allowed\n", regno); 7704 return -EACCES; 7705 } 7706 7707 err = check_mem_region_access(env, regno, off, size, 7708 reg->map_ptr->key_size, false); 7709 if (err) 7710 return err; 7711 if (value_regno >= 0) 7712 mark_reg_unknown(env, regs, value_regno); 7713 } else if (reg->type == PTR_TO_MAP_VALUE) { 7714 struct btf_field *kptr_field = NULL; 7715 7716 if (t == BPF_WRITE && value_regno >= 0 && 7717 is_pointer_value(env, value_regno)) { 7718 verbose(env, "R%d leaks addr into map\n", value_regno); 7719 return -EACCES; 7720 } 7721 err = check_map_access_type(env, regno, off, size, t); 7722 if (err) 7723 return err; 7724 err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT); 7725 if (err) 7726 return err; 7727 if (tnum_is_const(reg->var_off)) 7728 kptr_field = btf_record_find(reg->map_ptr->record, 7729 off + reg->var_off.value, BPF_KPTR | BPF_UPTR); 7730 if (kptr_field) { 7731 err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field); 7732 } else if (t == BPF_READ && value_regno >= 0) { 7733 struct bpf_map *map = reg->map_ptr; 7734 7735 /* 7736 * If map is read-only, track its contents as scalars, 7737 * unless it is an insn array (see the special case below) 7738 */ 7739 if (tnum_is_const(reg->var_off) && 7740 bpf_map_is_rdonly(map) && 7741 map->ops->map_direct_value_addr && 7742 map->map_type != BPF_MAP_TYPE_INSN_ARRAY) { 7743 int map_off = off + reg->var_off.value; 7744 u64 val = 0; 7745 7746 err = bpf_map_direct_read(map, map_off, size, 7747 &val, is_ldsx); 7748 if (err) 7749 return err; 7750 7751 regs[value_regno].type = SCALAR_VALUE; 7752 __mark_reg_known(®s[value_regno], val); 7753 } else if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) { 7754 if (bpf_size != BPF_DW) { 7755 verbose(env, "Invalid read of %d bytes from insn_array\n", 7756 size); 7757 return -EACCES; 7758 } 7759 copy_register_state(®s[value_regno], reg); 7760 regs[value_regno].type = PTR_TO_INSN; 7761 } else { 7762 mark_reg_unknown(env, regs, value_regno); 7763 } 7764 } 7765 } else if (base_type(reg->type) == PTR_TO_MEM) { 7766 bool rdonly_mem = type_is_rdonly_mem(reg->type); 7767 bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED); 7768 7769 if (type_may_be_null(reg->type)) { 7770 verbose(env, "R%d invalid mem access '%s'\n", regno, 7771 reg_type_str(env, reg->type)); 7772 return -EACCES; 7773 } 7774 7775 if (t == BPF_WRITE && rdonly_mem) { 7776 verbose(env, "R%d cannot write into %s\n", 7777 regno, reg_type_str(env, reg->type)); 7778 return -EACCES; 7779 } 7780 7781 if (t == BPF_WRITE && value_regno >= 0 && 7782 is_pointer_value(env, value_regno)) { 7783 verbose(env, "R%d leaks addr into mem\n", value_regno); 7784 return -EACCES; 7785 } 7786 7787 /* 7788 * Accesses to untrusted PTR_TO_MEM are done through probe 7789 * instructions, hence no need to check bounds in that case. 7790 */ 7791 if (!rdonly_untrusted) 7792 err = check_mem_region_access(env, regno, off, size, 7793 reg->mem_size, false); 7794 if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) 7795 mark_reg_unknown(env, regs, value_regno); 7796 } else if (reg->type == PTR_TO_CTX) { 7797 struct bpf_retval_range range; 7798 struct bpf_insn_access_aux info = { 7799 .reg_type = SCALAR_VALUE, 7800 .is_ldsx = is_ldsx, 7801 .log = &env->log, 7802 }; 7803 7804 if (t == BPF_WRITE && value_regno >= 0 && 7805 is_pointer_value(env, value_regno)) { 7806 verbose(env, "R%d leaks addr into ctx\n", value_regno); 7807 return -EACCES; 7808 } 7809 7810 err = check_ptr_off_reg(env, reg, regno); 7811 if (err < 0) 7812 return err; 7813 7814 err = check_ctx_access(env, insn_idx, off, size, t, &info); 7815 if (err) 7816 verbose_linfo(env, insn_idx, "; "); 7817 if (!err && t == BPF_READ && value_regno >= 0) { 7818 /* ctx access returns either a scalar, or a 7819 * PTR_TO_PACKET[_META,_END]. In the latter 7820 * case, we know the offset is zero. 7821 */ 7822 if (info.reg_type == SCALAR_VALUE) { 7823 if (info.is_retval && get_func_retval_range(env->prog, &range)) { 7824 err = __mark_reg_s32_range(env, regs, value_regno, 7825 range.minval, range.maxval); 7826 if (err) 7827 return err; 7828 } else { 7829 mark_reg_unknown(env, regs, value_regno); 7830 } 7831 } else { 7832 mark_reg_known_zero(env, regs, 7833 value_regno); 7834 if (type_may_be_null(info.reg_type)) 7835 regs[value_regno].id = ++env->id_gen; 7836 /* A load of ctx field could have different 7837 * actual load size with the one encoded in the 7838 * insn. When the dst is PTR, it is for sure not 7839 * a sub-register. 7840 */ 7841 regs[value_regno].subreg_def = DEF_NOT_SUBREG; 7842 if (base_type(info.reg_type) == PTR_TO_BTF_ID) { 7843 regs[value_regno].btf = info.btf; 7844 regs[value_regno].btf_id = info.btf_id; 7845 regs[value_regno].ref_obj_id = info.ref_obj_id; 7846 } 7847 } 7848 regs[value_regno].type = info.reg_type; 7849 } 7850 7851 } else if (reg->type == PTR_TO_STACK) { 7852 /* Basic bounds checks. */ 7853 err = check_stack_access_within_bounds(env, regno, off, size, t); 7854 if (err) 7855 return err; 7856 7857 if (t == BPF_READ) 7858 err = check_stack_read(env, regno, off, size, 7859 value_regno); 7860 else 7861 err = check_stack_write(env, regno, off, size, 7862 value_regno, insn_idx); 7863 } else if (reg_is_pkt_pointer(reg)) { 7864 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { 7865 verbose(env, "cannot write into packet\n"); 7866 return -EACCES; 7867 } 7868 if (t == BPF_WRITE && value_regno >= 0 && 7869 is_pointer_value(env, value_regno)) { 7870 verbose(env, "R%d leaks addr into packet\n", 7871 value_regno); 7872 return -EACCES; 7873 } 7874 err = check_packet_access(env, regno, off, size, false); 7875 if (!err && t == BPF_READ && value_regno >= 0) 7876 mark_reg_unknown(env, regs, value_regno); 7877 } else if (reg->type == PTR_TO_FLOW_KEYS) { 7878 if (t == BPF_WRITE && value_regno >= 0 && 7879 is_pointer_value(env, value_regno)) { 7880 verbose(env, "R%d leaks addr into flow keys\n", 7881 value_regno); 7882 return -EACCES; 7883 } 7884 7885 err = check_flow_keys_access(env, off, size); 7886 if (!err && t == BPF_READ && value_regno >= 0) 7887 mark_reg_unknown(env, regs, value_regno); 7888 } else if (type_is_sk_pointer(reg->type)) { 7889 if (t == BPF_WRITE) { 7890 verbose(env, "R%d cannot write into %s\n", 7891 regno, reg_type_str(env, reg->type)); 7892 return -EACCES; 7893 } 7894 err = check_sock_access(env, insn_idx, regno, off, size, t); 7895 if (!err && value_regno >= 0) 7896 mark_reg_unknown(env, regs, value_regno); 7897 } else if (reg->type == PTR_TO_TP_BUFFER) { 7898 err = check_tp_buffer_access(env, reg, regno, off, size); 7899 if (!err && t == BPF_READ && value_regno >= 0) 7900 mark_reg_unknown(env, regs, value_regno); 7901 } else if (base_type(reg->type) == PTR_TO_BTF_ID && 7902 !type_may_be_null(reg->type)) { 7903 err = check_ptr_to_btf_access(env, regs, regno, off, size, t, 7904 value_regno); 7905 } else if (reg->type == CONST_PTR_TO_MAP) { 7906 err = check_ptr_to_map_access(env, regs, regno, off, size, t, 7907 value_regno); 7908 } else if (base_type(reg->type) == PTR_TO_BUF) { 7909 bool rdonly_mem = type_is_rdonly_mem(reg->type); 7910 u32 *max_access; 7911 7912 if (rdonly_mem) { 7913 if (t == BPF_WRITE) { 7914 verbose(env, "R%d cannot write into %s\n", 7915 regno, reg_type_str(env, reg->type)); 7916 return -EACCES; 7917 } 7918 max_access = &env->prog->aux->max_rdonly_access; 7919 } else { 7920 max_access = &env->prog->aux->max_rdwr_access; 7921 } 7922 7923 err = check_buffer_access(env, reg, regno, off, size, false, 7924 max_access); 7925 7926 if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ)) 7927 mark_reg_unknown(env, regs, value_regno); 7928 } else if (reg->type == PTR_TO_ARENA) { 7929 if (t == BPF_READ && value_regno >= 0) 7930 mark_reg_unknown(env, regs, value_regno); 7931 } else { 7932 verbose(env, "R%d invalid mem access '%s'\n", regno, 7933 reg_type_str(env, reg->type)); 7934 return -EACCES; 7935 } 7936 7937 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && 7938 regs[value_regno].type == SCALAR_VALUE) { 7939 if (!is_ldsx) 7940 /* b/h/w load zero-extends, mark upper bits as known 0 */ 7941 coerce_reg_to_size(®s[value_regno], size); 7942 else 7943 coerce_reg_to_size_sx(®s[value_regno], size); 7944 } 7945 return err; 7946 } 7947 7948 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type, 7949 bool allow_trust_mismatch); 7950 7951 static int check_load_mem(struct bpf_verifier_env *env, struct bpf_insn *insn, 7952 bool strict_alignment_once, bool is_ldsx, 7953 bool allow_trust_mismatch, const char *ctx) 7954 { 7955 struct bpf_reg_state *regs = cur_regs(env); 7956 enum bpf_reg_type src_reg_type; 7957 int err; 7958 7959 /* check src operand */ 7960 err = check_reg_arg(env, insn->src_reg, SRC_OP); 7961 if (err) 7962 return err; 7963 7964 /* check dst operand */ 7965 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 7966 if (err) 7967 return err; 7968 7969 src_reg_type = regs[insn->src_reg].type; 7970 7971 /* Check if (src_reg + off) is readable. The state of dst_reg will be 7972 * updated by this call. 7973 */ 7974 err = check_mem_access(env, env->insn_idx, insn->src_reg, insn->off, 7975 BPF_SIZE(insn->code), BPF_READ, insn->dst_reg, 7976 strict_alignment_once, is_ldsx); 7977 err = err ?: save_aux_ptr_type(env, src_reg_type, 7978 allow_trust_mismatch); 7979 err = err ?: reg_bounds_sanity_check(env, ®s[insn->dst_reg], ctx); 7980 7981 return err; 7982 } 7983 7984 static int check_store_reg(struct bpf_verifier_env *env, struct bpf_insn *insn, 7985 bool strict_alignment_once) 7986 { 7987 struct bpf_reg_state *regs = cur_regs(env); 7988 enum bpf_reg_type dst_reg_type; 7989 int err; 7990 7991 /* check src1 operand */ 7992 err = check_reg_arg(env, insn->src_reg, SRC_OP); 7993 if (err) 7994 return err; 7995 7996 /* check src2 operand */ 7997 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 7998 if (err) 7999 return err; 8000 8001 dst_reg_type = regs[insn->dst_reg].type; 8002 8003 /* Check if (dst_reg + off) is writeable. */ 8004 err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, 8005 BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg, 8006 strict_alignment_once, false); 8007 err = err ?: save_aux_ptr_type(env, dst_reg_type, false); 8008 8009 return err; 8010 } 8011 8012 static int check_atomic_rmw(struct bpf_verifier_env *env, 8013 struct bpf_insn *insn) 8014 { 8015 int load_reg; 8016 int err; 8017 8018 if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) { 8019 verbose(env, "invalid atomic operand size\n"); 8020 return -EINVAL; 8021 } 8022 8023 /* check src1 operand */ 8024 err = check_reg_arg(env, insn->src_reg, SRC_OP); 8025 if (err) 8026 return err; 8027 8028 /* check src2 operand */ 8029 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 8030 if (err) 8031 return err; 8032 8033 if (insn->imm == BPF_CMPXCHG) { 8034 /* Check comparison of R0 with memory location */ 8035 const u32 aux_reg = BPF_REG_0; 8036 8037 err = check_reg_arg(env, aux_reg, SRC_OP); 8038 if (err) 8039 return err; 8040 8041 if (is_pointer_value(env, aux_reg)) { 8042 verbose(env, "R%d leaks addr into mem\n", aux_reg); 8043 return -EACCES; 8044 } 8045 } 8046 8047 if (is_pointer_value(env, insn->src_reg)) { 8048 verbose(env, "R%d leaks addr into mem\n", insn->src_reg); 8049 return -EACCES; 8050 } 8051 8052 if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) { 8053 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", 8054 insn->dst_reg, 8055 reg_type_str(env, reg_state(env, insn->dst_reg)->type)); 8056 return -EACCES; 8057 } 8058 8059 if (insn->imm & BPF_FETCH) { 8060 if (insn->imm == BPF_CMPXCHG) 8061 load_reg = BPF_REG_0; 8062 else 8063 load_reg = insn->src_reg; 8064 8065 /* check and record load of old value */ 8066 err = check_reg_arg(env, load_reg, DST_OP); 8067 if (err) 8068 return err; 8069 } else { 8070 /* This instruction accesses a memory location but doesn't 8071 * actually load it into a register. 8072 */ 8073 load_reg = -1; 8074 } 8075 8076 /* Check whether we can read the memory, with second call for fetch 8077 * case to simulate the register fill. 8078 */ 8079 err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, 8080 BPF_SIZE(insn->code), BPF_READ, -1, true, false); 8081 if (!err && load_reg >= 0) 8082 err = check_mem_access(env, env->insn_idx, insn->dst_reg, 8083 insn->off, BPF_SIZE(insn->code), 8084 BPF_READ, load_reg, true, false); 8085 if (err) 8086 return err; 8087 8088 if (is_arena_reg(env, insn->dst_reg)) { 8089 err = save_aux_ptr_type(env, PTR_TO_ARENA, false); 8090 if (err) 8091 return err; 8092 } 8093 /* Check whether we can write into the same memory. */ 8094 err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, 8095 BPF_SIZE(insn->code), BPF_WRITE, -1, true, false); 8096 if (err) 8097 return err; 8098 return 0; 8099 } 8100 8101 static int check_atomic_load(struct bpf_verifier_env *env, 8102 struct bpf_insn *insn) 8103 { 8104 int err; 8105 8106 err = check_load_mem(env, insn, true, false, false, "atomic_load"); 8107 if (err) 8108 return err; 8109 8110 if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) { 8111 verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n", 8112 insn->src_reg, 8113 reg_type_str(env, reg_state(env, insn->src_reg)->type)); 8114 return -EACCES; 8115 } 8116 8117 return 0; 8118 } 8119 8120 static int check_atomic_store(struct bpf_verifier_env *env, 8121 struct bpf_insn *insn) 8122 { 8123 int err; 8124 8125 err = check_store_reg(env, insn, true); 8126 if (err) 8127 return err; 8128 8129 if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) { 8130 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", 8131 insn->dst_reg, 8132 reg_type_str(env, reg_state(env, insn->dst_reg)->type)); 8133 return -EACCES; 8134 } 8135 8136 return 0; 8137 } 8138 8139 static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn) 8140 { 8141 switch (insn->imm) { 8142 case BPF_ADD: 8143 case BPF_ADD | BPF_FETCH: 8144 case BPF_AND: 8145 case BPF_AND | BPF_FETCH: 8146 case BPF_OR: 8147 case BPF_OR | BPF_FETCH: 8148 case BPF_XOR: 8149 case BPF_XOR | BPF_FETCH: 8150 case BPF_XCHG: 8151 case BPF_CMPXCHG: 8152 return check_atomic_rmw(env, insn); 8153 case BPF_LOAD_ACQ: 8154 if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) { 8155 verbose(env, 8156 "64-bit load-acquires are only supported on 64-bit arches\n"); 8157 return -EOPNOTSUPP; 8158 } 8159 return check_atomic_load(env, insn); 8160 case BPF_STORE_REL: 8161 if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) { 8162 verbose(env, 8163 "64-bit store-releases are only supported on 64-bit arches\n"); 8164 return -EOPNOTSUPP; 8165 } 8166 return check_atomic_store(env, insn); 8167 default: 8168 verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", 8169 insn->imm); 8170 return -EINVAL; 8171 } 8172 } 8173 8174 /* When register 'regno' is used to read the stack (either directly or through 8175 * a helper function) make sure that it's within stack boundary and, depending 8176 * on the access type and privileges, that all elements of the stack are 8177 * initialized. 8178 * 8179 * 'off' includes 'regno->off', but not its dynamic part (if any). 8180 * 8181 * All registers that have been spilled on the stack in the slots within the 8182 * read offsets are marked as read. 8183 */ 8184 static int check_stack_range_initialized( 8185 struct bpf_verifier_env *env, int regno, int off, 8186 int access_size, bool zero_size_allowed, 8187 enum bpf_access_type type, struct bpf_call_arg_meta *meta) 8188 { 8189 struct bpf_reg_state *reg = reg_state(env, regno); 8190 struct bpf_func_state *state = func(env, reg); 8191 int err, min_off, max_off, i, j, slot, spi; 8192 /* Some accesses can write anything into the stack, others are 8193 * read-only. 8194 */ 8195 bool clobber = false; 8196 8197 if (access_size == 0 && !zero_size_allowed) { 8198 verbose(env, "invalid zero-sized read\n"); 8199 return -EACCES; 8200 } 8201 8202 if (type == BPF_WRITE) 8203 clobber = true; 8204 8205 err = check_stack_access_within_bounds(env, regno, off, access_size, type); 8206 if (err) 8207 return err; 8208 8209 8210 if (tnum_is_const(reg->var_off)) { 8211 min_off = max_off = reg->var_off.value + off; 8212 } else { 8213 /* Variable offset is prohibited for unprivileged mode for 8214 * simplicity since it requires corresponding support in 8215 * Spectre masking for stack ALU. 8216 * See also retrieve_ptr_limit(). 8217 */ 8218 if (!env->bypass_spec_v1) { 8219 char tn_buf[48]; 8220 8221 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 8222 verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n", 8223 regno, tn_buf); 8224 return -EACCES; 8225 } 8226 /* Only initialized buffer on stack is allowed to be accessed 8227 * with variable offset. With uninitialized buffer it's hard to 8228 * guarantee that whole memory is marked as initialized on 8229 * helper return since specific bounds are unknown what may 8230 * cause uninitialized stack leaking. 8231 */ 8232 if (meta && meta->raw_mode) 8233 meta = NULL; 8234 8235 min_off = reg->smin_value + off; 8236 max_off = reg->smax_value + off; 8237 } 8238 8239 if (meta && meta->raw_mode) { 8240 /* Ensure we won't be overwriting dynptrs when simulating byte 8241 * by byte access in check_helper_call using meta.access_size. 8242 * This would be a problem if we have a helper in the future 8243 * which takes: 8244 * 8245 * helper(uninit_mem, len, dynptr) 8246 * 8247 * Now, uninint_mem may overlap with dynptr pointer. Hence, it 8248 * may end up writing to dynptr itself when touching memory from 8249 * arg 1. This can be relaxed on a case by case basis for known 8250 * safe cases, but reject due to the possibilitiy of aliasing by 8251 * default. 8252 */ 8253 for (i = min_off; i < max_off + access_size; i++) { 8254 int stack_off = -i - 1; 8255 8256 spi = __get_spi(i); 8257 /* raw_mode may write past allocated_stack */ 8258 if (state->allocated_stack <= stack_off) 8259 continue; 8260 if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) { 8261 verbose(env, "potential write to dynptr at off=%d disallowed\n", i); 8262 return -EACCES; 8263 } 8264 } 8265 meta->access_size = access_size; 8266 meta->regno = regno; 8267 return 0; 8268 } 8269 8270 for (i = min_off; i < max_off + access_size; i++) { 8271 u8 *stype; 8272 8273 slot = -i - 1; 8274 spi = slot / BPF_REG_SIZE; 8275 if (state->allocated_stack <= slot) { 8276 verbose(env, "allocated_stack too small\n"); 8277 return -EFAULT; 8278 } 8279 8280 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; 8281 if (*stype == STACK_MISC) 8282 goto mark; 8283 if ((*stype == STACK_ZERO) || 8284 (*stype == STACK_INVALID && env->allow_uninit_stack)) { 8285 if (clobber) { 8286 /* helper can write anything into the stack */ 8287 *stype = STACK_MISC; 8288 } 8289 goto mark; 8290 } 8291 8292 if (is_spilled_reg(&state->stack[spi]) && 8293 (state->stack[spi].spilled_ptr.type == SCALAR_VALUE || 8294 env->allow_ptr_leaks)) { 8295 if (clobber) { 8296 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); 8297 for (j = 0; j < BPF_REG_SIZE; j++) 8298 scrub_spilled_slot(&state->stack[spi].slot_type[j]); 8299 } 8300 goto mark; 8301 } 8302 8303 if (tnum_is_const(reg->var_off)) { 8304 verbose(env, "invalid read from stack R%d off %d+%d size %d\n", 8305 regno, min_off, i - min_off, access_size); 8306 } else { 8307 char tn_buf[48]; 8308 8309 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 8310 verbose(env, "invalid read from stack R%d var_off %s+%d size %d\n", 8311 regno, tn_buf, i - min_off, access_size); 8312 } 8313 return -EACCES; 8314 mark: 8315 /* reading any byte out of 8-byte 'spill_slot' will cause 8316 * the whole slot to be marked as 'read' 8317 */ 8318 err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx, BIT(spi)); 8319 if (err) 8320 return err; 8321 /* We do not call bpf_mark_stack_write(), as we can not 8322 * be sure that whether stack slot is written to or not. Hence, 8323 * we must still conservatively propagate reads upwards even if 8324 * helper may write to the entire memory range. 8325 */ 8326 } 8327 return 0; 8328 } 8329 8330 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, 8331 int access_size, enum bpf_access_type access_type, 8332 bool zero_size_allowed, 8333 struct bpf_call_arg_meta *meta) 8334 { 8335 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; 8336 u32 *max_access; 8337 8338 switch (base_type(reg->type)) { 8339 case PTR_TO_PACKET: 8340 case PTR_TO_PACKET_META: 8341 return check_packet_access(env, regno, reg->off, access_size, 8342 zero_size_allowed); 8343 case PTR_TO_MAP_KEY: 8344 if (access_type == BPF_WRITE) { 8345 verbose(env, "R%d cannot write into %s\n", regno, 8346 reg_type_str(env, reg->type)); 8347 return -EACCES; 8348 } 8349 return check_mem_region_access(env, regno, reg->off, access_size, 8350 reg->map_ptr->key_size, false); 8351 case PTR_TO_MAP_VALUE: 8352 if (check_map_access_type(env, regno, reg->off, access_size, access_type)) 8353 return -EACCES; 8354 return check_map_access(env, regno, reg->off, access_size, 8355 zero_size_allowed, ACCESS_HELPER); 8356 case PTR_TO_MEM: 8357 if (type_is_rdonly_mem(reg->type)) { 8358 if (access_type == BPF_WRITE) { 8359 verbose(env, "R%d cannot write into %s\n", regno, 8360 reg_type_str(env, reg->type)); 8361 return -EACCES; 8362 } 8363 } 8364 return check_mem_region_access(env, regno, reg->off, 8365 access_size, reg->mem_size, 8366 zero_size_allowed); 8367 case PTR_TO_BUF: 8368 if (type_is_rdonly_mem(reg->type)) { 8369 if (access_type == BPF_WRITE) { 8370 verbose(env, "R%d cannot write into %s\n", regno, 8371 reg_type_str(env, reg->type)); 8372 return -EACCES; 8373 } 8374 8375 max_access = &env->prog->aux->max_rdonly_access; 8376 } else { 8377 max_access = &env->prog->aux->max_rdwr_access; 8378 } 8379 return check_buffer_access(env, reg, regno, reg->off, 8380 access_size, zero_size_allowed, 8381 max_access); 8382 case PTR_TO_STACK: 8383 return check_stack_range_initialized( 8384 env, 8385 regno, reg->off, access_size, 8386 zero_size_allowed, access_type, meta); 8387 case PTR_TO_BTF_ID: 8388 return check_ptr_to_btf_access(env, regs, regno, reg->off, 8389 access_size, BPF_READ, -1); 8390 case PTR_TO_CTX: 8391 /* in case the function doesn't know how to access the context, 8392 * (because we are in a program of type SYSCALL for example), we 8393 * can not statically check its size. 8394 * Dynamically check it now. 8395 */ 8396 if (!env->ops->convert_ctx_access) { 8397 int offset = access_size - 1; 8398 8399 /* Allow zero-byte read from PTR_TO_CTX */ 8400 if (access_size == 0) 8401 return zero_size_allowed ? 0 : -EACCES; 8402 8403 return check_mem_access(env, env->insn_idx, regno, offset, BPF_B, 8404 access_type, -1, false, false); 8405 } 8406 8407 fallthrough; 8408 default: /* scalar_value or invalid ptr */ 8409 /* Allow zero-byte read from NULL, regardless of pointer type */ 8410 if (zero_size_allowed && access_size == 0 && 8411 register_is_null(reg)) 8412 return 0; 8413 8414 verbose(env, "R%d type=%s ", regno, 8415 reg_type_str(env, reg->type)); 8416 verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK)); 8417 return -EACCES; 8418 } 8419 } 8420 8421 /* verify arguments to helpers or kfuncs consisting of a pointer and an access 8422 * size. 8423 * 8424 * @regno is the register containing the access size. regno-1 is the register 8425 * containing the pointer. 8426 */ 8427 static int check_mem_size_reg(struct bpf_verifier_env *env, 8428 struct bpf_reg_state *reg, u32 regno, 8429 enum bpf_access_type access_type, 8430 bool zero_size_allowed, 8431 struct bpf_call_arg_meta *meta) 8432 { 8433 int err; 8434 8435 /* This is used to refine r0 return value bounds for helpers 8436 * that enforce this value as an upper bound on return values. 8437 * See do_refine_retval_range() for helpers that can refine 8438 * the return value. C type of helper is u32 so we pull register 8439 * bound from umax_value however, if negative verifier errors 8440 * out. Only upper bounds can be learned because retval is an 8441 * int type and negative retvals are allowed. 8442 */ 8443 meta->msize_max_value = reg->umax_value; 8444 8445 /* The register is SCALAR_VALUE; the access check happens using 8446 * its boundaries. For unprivileged variable accesses, disable 8447 * raw mode so that the program is required to initialize all 8448 * the memory that the helper could just partially fill up. 8449 */ 8450 if (!tnum_is_const(reg->var_off)) 8451 meta = NULL; 8452 8453 if (reg->smin_value < 0) { 8454 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", 8455 regno); 8456 return -EACCES; 8457 } 8458 8459 if (reg->umin_value == 0 && !zero_size_allowed) { 8460 verbose(env, "R%d invalid zero-sized read: u64=[%lld,%lld]\n", 8461 regno, reg->umin_value, reg->umax_value); 8462 return -EACCES; 8463 } 8464 8465 if (reg->umax_value >= BPF_MAX_VAR_SIZ) { 8466 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", 8467 regno); 8468 return -EACCES; 8469 } 8470 err = check_helper_mem_access(env, regno - 1, reg->umax_value, 8471 access_type, zero_size_allowed, meta); 8472 if (!err) 8473 err = mark_chain_precision(env, regno); 8474 return err; 8475 } 8476 8477 static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 8478 u32 regno, u32 mem_size) 8479 { 8480 bool may_be_null = type_may_be_null(reg->type); 8481 struct bpf_reg_state saved_reg; 8482 int err; 8483 8484 if (register_is_null(reg)) 8485 return 0; 8486 8487 /* Assuming that the register contains a value check if the memory 8488 * access is safe. Temporarily save and restore the register's state as 8489 * the conversion shouldn't be visible to a caller. 8490 */ 8491 if (may_be_null) { 8492 saved_reg = *reg; 8493 mark_ptr_not_null_reg(reg); 8494 } 8495 8496 err = check_helper_mem_access(env, regno, mem_size, BPF_READ, true, NULL); 8497 err = err ?: check_helper_mem_access(env, regno, mem_size, BPF_WRITE, true, NULL); 8498 8499 if (may_be_null) 8500 *reg = saved_reg; 8501 8502 return err; 8503 } 8504 8505 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 8506 u32 regno) 8507 { 8508 struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1]; 8509 bool may_be_null = type_may_be_null(mem_reg->type); 8510 struct bpf_reg_state saved_reg; 8511 struct bpf_call_arg_meta meta; 8512 int err; 8513 8514 WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5); 8515 8516 memset(&meta, 0, sizeof(meta)); 8517 8518 if (may_be_null) { 8519 saved_reg = *mem_reg; 8520 mark_ptr_not_null_reg(mem_reg); 8521 } 8522 8523 err = check_mem_size_reg(env, reg, regno, BPF_READ, true, &meta); 8524 err = err ?: check_mem_size_reg(env, reg, regno, BPF_WRITE, true, &meta); 8525 8526 if (may_be_null) 8527 *mem_reg = saved_reg; 8528 8529 return err; 8530 } 8531 8532 enum { 8533 PROCESS_SPIN_LOCK = (1 << 0), 8534 PROCESS_RES_LOCK = (1 << 1), 8535 PROCESS_LOCK_IRQ = (1 << 2), 8536 }; 8537 8538 /* Implementation details: 8539 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL. 8540 * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL. 8541 * Two bpf_map_lookups (even with the same key) will have different reg->id. 8542 * Two separate bpf_obj_new will also have different reg->id. 8543 * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier 8544 * clears reg->id after value_or_null->value transition, since the verifier only 8545 * cares about the range of access to valid map value pointer and doesn't care 8546 * about actual address of the map element. 8547 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps 8548 * reg->id > 0 after value_or_null->value transition. By doing so 8549 * two bpf_map_lookups will be considered two different pointers that 8550 * point to different bpf_spin_locks. Likewise for pointers to allocated objects 8551 * returned from bpf_obj_new. 8552 * The verifier allows taking only one bpf_spin_lock at a time to avoid 8553 * dead-locks. 8554 * Since only one bpf_spin_lock is allowed the checks are simpler than 8555 * reg_is_refcounted() logic. The verifier needs to remember only 8556 * one spin_lock instead of array of acquired_refs. 8557 * env->cur_state->active_locks remembers which map value element or allocated 8558 * object got locked and clears it after bpf_spin_unlock. 8559 */ 8560 static int process_spin_lock(struct bpf_verifier_env *env, int regno, int flags) 8561 { 8562 bool is_lock = flags & PROCESS_SPIN_LOCK, is_res_lock = flags & PROCESS_RES_LOCK; 8563 const char *lock_str = is_res_lock ? "bpf_res_spin" : "bpf_spin"; 8564 struct bpf_reg_state *reg = reg_state(env, regno); 8565 struct bpf_verifier_state *cur = env->cur_state; 8566 bool is_const = tnum_is_const(reg->var_off); 8567 bool is_irq = flags & PROCESS_LOCK_IRQ; 8568 u64 val = reg->var_off.value; 8569 struct bpf_map *map = NULL; 8570 struct btf *btf = NULL; 8571 struct btf_record *rec; 8572 u32 spin_lock_off; 8573 int err; 8574 8575 if (!is_const) { 8576 verbose(env, 8577 "R%d doesn't have constant offset. %s_lock has to be at the constant offset\n", 8578 regno, lock_str); 8579 return -EINVAL; 8580 } 8581 if (reg->type == PTR_TO_MAP_VALUE) { 8582 map = reg->map_ptr; 8583 if (!map->btf) { 8584 verbose(env, 8585 "map '%s' has to have BTF in order to use %s_lock\n", 8586 map->name, lock_str); 8587 return -EINVAL; 8588 } 8589 } else { 8590 btf = reg->btf; 8591 } 8592 8593 rec = reg_btf_record(reg); 8594 if (!btf_record_has_field(rec, is_res_lock ? BPF_RES_SPIN_LOCK : BPF_SPIN_LOCK)) { 8595 verbose(env, "%s '%s' has no valid %s_lock\n", map ? "map" : "local", 8596 map ? map->name : "kptr", lock_str); 8597 return -EINVAL; 8598 } 8599 spin_lock_off = is_res_lock ? rec->res_spin_lock_off : rec->spin_lock_off; 8600 if (spin_lock_off != val + reg->off) { 8601 verbose(env, "off %lld doesn't point to 'struct %s_lock' that is at %d\n", 8602 val + reg->off, lock_str, spin_lock_off); 8603 return -EINVAL; 8604 } 8605 if (is_lock) { 8606 void *ptr; 8607 int type; 8608 8609 if (map) 8610 ptr = map; 8611 else 8612 ptr = btf; 8613 8614 if (!is_res_lock && cur->active_locks) { 8615 if (find_lock_state(env->cur_state, REF_TYPE_LOCK, 0, NULL)) { 8616 verbose(env, 8617 "Locking two bpf_spin_locks are not allowed\n"); 8618 return -EINVAL; 8619 } 8620 } else if (is_res_lock && cur->active_locks) { 8621 if (find_lock_state(env->cur_state, REF_TYPE_RES_LOCK | REF_TYPE_RES_LOCK_IRQ, reg->id, ptr)) { 8622 verbose(env, "Acquiring the same lock again, AA deadlock detected\n"); 8623 return -EINVAL; 8624 } 8625 } 8626 8627 if (is_res_lock && is_irq) 8628 type = REF_TYPE_RES_LOCK_IRQ; 8629 else if (is_res_lock) 8630 type = REF_TYPE_RES_LOCK; 8631 else 8632 type = REF_TYPE_LOCK; 8633 err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr); 8634 if (err < 0) { 8635 verbose(env, "Failed to acquire lock state\n"); 8636 return err; 8637 } 8638 } else { 8639 void *ptr; 8640 int type; 8641 8642 if (map) 8643 ptr = map; 8644 else 8645 ptr = btf; 8646 8647 if (!cur->active_locks) { 8648 verbose(env, "%s_unlock without taking a lock\n", lock_str); 8649 return -EINVAL; 8650 } 8651 8652 if (is_res_lock && is_irq) 8653 type = REF_TYPE_RES_LOCK_IRQ; 8654 else if (is_res_lock) 8655 type = REF_TYPE_RES_LOCK; 8656 else 8657 type = REF_TYPE_LOCK; 8658 if (!find_lock_state(cur, type, reg->id, ptr)) { 8659 verbose(env, "%s_unlock of different lock\n", lock_str); 8660 return -EINVAL; 8661 } 8662 if (reg->id != cur->active_lock_id || ptr != cur->active_lock_ptr) { 8663 verbose(env, "%s_unlock cannot be out of order\n", lock_str); 8664 return -EINVAL; 8665 } 8666 if (release_lock_state(cur, type, reg->id, ptr)) { 8667 verbose(env, "%s_unlock of different lock\n", lock_str); 8668 return -EINVAL; 8669 } 8670 8671 invalidate_non_owning_refs(env); 8672 } 8673 return 0; 8674 } 8675 8676 /* Check if @regno is a pointer to a specific field in a map value */ 8677 static int check_map_field_pointer(struct bpf_verifier_env *env, u32 regno, 8678 enum btf_field_type field_type, 8679 struct bpf_map_desc *map_desc) 8680 { 8681 struct bpf_reg_state *reg = reg_state(env, regno); 8682 bool is_const = tnum_is_const(reg->var_off); 8683 struct bpf_map *map = reg->map_ptr; 8684 u64 val = reg->var_off.value; 8685 const char *struct_name = btf_field_type_name(field_type); 8686 int field_off = -1; 8687 8688 if (!is_const) { 8689 verbose(env, 8690 "R%d doesn't have constant offset. %s has to be at the constant offset\n", 8691 regno, struct_name); 8692 return -EINVAL; 8693 } 8694 if (!map->btf) { 8695 verbose(env, "map '%s' has to have BTF in order to use %s\n", map->name, 8696 struct_name); 8697 return -EINVAL; 8698 } 8699 if (!btf_record_has_field(map->record, field_type)) { 8700 verbose(env, "map '%s' has no valid %s\n", map->name, struct_name); 8701 return -EINVAL; 8702 } 8703 switch (field_type) { 8704 case BPF_TIMER: 8705 field_off = map->record->timer_off; 8706 break; 8707 case BPF_TASK_WORK: 8708 field_off = map->record->task_work_off; 8709 break; 8710 case BPF_WORKQUEUE: 8711 field_off = map->record->wq_off; 8712 break; 8713 default: 8714 verifier_bug(env, "unsupported BTF field type: %s\n", struct_name); 8715 return -EINVAL; 8716 } 8717 if (field_off != val + reg->off) { 8718 verbose(env, "off %lld doesn't point to 'struct %s' that is at %d\n", 8719 val + reg->off, struct_name, field_off); 8720 return -EINVAL; 8721 } 8722 if (map_desc->ptr) { 8723 verifier_bug(env, "Two map pointers in a %s helper", struct_name); 8724 return -EFAULT; 8725 } 8726 map_desc->uid = reg->map_uid; 8727 map_desc->ptr = map; 8728 return 0; 8729 } 8730 8731 static int process_timer_func(struct bpf_verifier_env *env, int regno, 8732 struct bpf_map_desc *map) 8733 { 8734 if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 8735 verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n"); 8736 return -EOPNOTSUPP; 8737 } 8738 return check_map_field_pointer(env, regno, BPF_TIMER, map); 8739 } 8740 8741 static int process_timer_helper(struct bpf_verifier_env *env, int regno, 8742 struct bpf_call_arg_meta *meta) 8743 { 8744 return process_timer_func(env, regno, &meta->map); 8745 } 8746 8747 static int process_timer_kfunc(struct bpf_verifier_env *env, int regno, 8748 struct bpf_kfunc_call_arg_meta *meta) 8749 { 8750 return process_timer_func(env, regno, &meta->map); 8751 } 8752 8753 static int process_kptr_func(struct bpf_verifier_env *env, int regno, 8754 struct bpf_call_arg_meta *meta) 8755 { 8756 struct bpf_reg_state *reg = reg_state(env, regno); 8757 struct btf_field *kptr_field; 8758 struct bpf_map *map_ptr; 8759 struct btf_record *rec; 8760 u32 kptr_off; 8761 8762 if (type_is_ptr_alloc_obj(reg->type)) { 8763 rec = reg_btf_record(reg); 8764 } else { /* PTR_TO_MAP_VALUE */ 8765 map_ptr = reg->map_ptr; 8766 if (!map_ptr->btf) { 8767 verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n", 8768 map_ptr->name); 8769 return -EINVAL; 8770 } 8771 rec = map_ptr->record; 8772 meta->map.ptr = map_ptr; 8773 } 8774 8775 if (!tnum_is_const(reg->var_off)) { 8776 verbose(env, 8777 "R%d doesn't have constant offset. kptr has to be at the constant offset\n", 8778 regno); 8779 return -EINVAL; 8780 } 8781 8782 if (!btf_record_has_field(rec, BPF_KPTR)) { 8783 verbose(env, "R%d has no valid kptr\n", regno); 8784 return -EINVAL; 8785 } 8786 8787 kptr_off = reg->off + reg->var_off.value; 8788 kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR); 8789 if (!kptr_field) { 8790 verbose(env, "off=%d doesn't point to kptr\n", kptr_off); 8791 return -EACCES; 8792 } 8793 if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) { 8794 verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off); 8795 return -EACCES; 8796 } 8797 meta->kptr_field = kptr_field; 8798 return 0; 8799 } 8800 8801 /* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK 8802 * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR. 8803 * 8804 * In both cases we deal with the first 8 bytes, but need to mark the next 8 8805 * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of 8806 * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object. 8807 * 8808 * Mutability of bpf_dynptr is at two levels, one is at the level of struct 8809 * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct 8810 * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can 8811 * mutate the view of the dynptr and also possibly destroy it. In the latter 8812 * case, it cannot mutate the bpf_dynptr itself but it can still mutate the 8813 * memory that dynptr points to. 8814 * 8815 * The verifier will keep track both levels of mutation (bpf_dynptr's in 8816 * reg->type and the memory's in reg->dynptr.type), but there is no support for 8817 * readonly dynptr view yet, hence only the first case is tracked and checked. 8818 * 8819 * This is consistent with how C applies the const modifier to a struct object, 8820 * where the pointer itself inside bpf_dynptr becomes const but not what it 8821 * points to. 8822 * 8823 * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument 8824 * type, and declare it as 'const struct bpf_dynptr *' in their prototype. 8825 */ 8826 static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx, 8827 enum bpf_arg_type arg_type, int clone_ref_obj_id) 8828 { 8829 struct bpf_reg_state *reg = reg_state(env, regno); 8830 int err; 8831 8832 if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) { 8833 verbose(env, 8834 "arg#%d expected pointer to stack or const struct bpf_dynptr\n", 8835 regno - 1); 8836 return -EINVAL; 8837 } 8838 8839 /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an 8840 * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*): 8841 */ 8842 if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) { 8843 verifier_bug(env, "misconfigured dynptr helper type flags"); 8844 return -EFAULT; 8845 } 8846 8847 /* MEM_UNINIT - Points to memory that is an appropriate candidate for 8848 * constructing a mutable bpf_dynptr object. 8849 * 8850 * Currently, this is only possible with PTR_TO_STACK 8851 * pointing to a region of at least 16 bytes which doesn't 8852 * contain an existing bpf_dynptr. 8853 * 8854 * MEM_RDONLY - Points to a initialized bpf_dynptr that will not be 8855 * mutated or destroyed. However, the memory it points to 8856 * may be mutated. 8857 * 8858 * None - Points to a initialized dynptr that can be mutated and 8859 * destroyed, including mutation of the memory it points 8860 * to. 8861 */ 8862 if (arg_type & MEM_UNINIT) { 8863 int i; 8864 8865 if (!is_dynptr_reg_valid_uninit(env, reg)) { 8866 verbose(env, "Dynptr has to be an uninitialized dynptr\n"); 8867 return -EINVAL; 8868 } 8869 8870 /* we write BPF_DW bits (8 bytes) at a time */ 8871 for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) { 8872 err = check_mem_access(env, insn_idx, regno, 8873 i, BPF_DW, BPF_WRITE, -1, false, false); 8874 if (err) 8875 return err; 8876 } 8877 8878 err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id); 8879 } else /* MEM_RDONLY and None case from above */ { 8880 /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */ 8881 if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) { 8882 verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n"); 8883 return -EINVAL; 8884 } 8885 8886 if (!is_dynptr_reg_valid_init(env, reg)) { 8887 verbose(env, 8888 "Expected an initialized dynptr as arg #%d\n", 8889 regno - 1); 8890 return -EINVAL; 8891 } 8892 8893 /* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */ 8894 if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) { 8895 verbose(env, 8896 "Expected a dynptr of type %s as arg #%d\n", 8897 dynptr_type_str(arg_to_dynptr_type(arg_type)), regno - 1); 8898 return -EINVAL; 8899 } 8900 8901 err = mark_dynptr_read(env, reg); 8902 } 8903 return err; 8904 } 8905 8906 static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi) 8907 { 8908 struct bpf_func_state *state = func(env, reg); 8909 8910 return state->stack[spi].spilled_ptr.ref_obj_id; 8911 } 8912 8913 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta) 8914 { 8915 return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY); 8916 } 8917 8918 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta) 8919 { 8920 return meta->kfunc_flags & KF_ITER_NEW; 8921 } 8922 8923 static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta) 8924 { 8925 return meta->kfunc_flags & KF_ITER_NEXT; 8926 } 8927 8928 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta) 8929 { 8930 return meta->kfunc_flags & KF_ITER_DESTROY; 8931 } 8932 8933 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx, 8934 const struct btf_param *arg) 8935 { 8936 /* btf_check_iter_kfuncs() guarantees that first argument of any iter 8937 * kfunc is iter state pointer 8938 */ 8939 if (is_iter_kfunc(meta)) 8940 return arg_idx == 0; 8941 8942 /* iter passed as an argument to a generic kfunc */ 8943 return btf_param_match_suffix(meta->btf, arg, "__iter"); 8944 } 8945 8946 static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx, 8947 struct bpf_kfunc_call_arg_meta *meta) 8948 { 8949 struct bpf_reg_state *reg = reg_state(env, regno); 8950 const struct btf_type *t; 8951 int spi, err, i, nr_slots, btf_id; 8952 8953 if (reg->type != PTR_TO_STACK) { 8954 verbose(env, "arg#%d expected pointer to an iterator on stack\n", regno - 1); 8955 return -EINVAL; 8956 } 8957 8958 /* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs() 8959 * ensures struct convention, so we wouldn't need to do any BTF 8960 * validation here. But given iter state can be passed as a parameter 8961 * to any kfunc, if arg has "__iter" suffix, we need to be a bit more 8962 * conservative here. 8963 */ 8964 btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1); 8965 if (btf_id < 0) { 8966 verbose(env, "expected valid iter pointer as arg #%d\n", regno - 1); 8967 return -EINVAL; 8968 } 8969 t = btf_type_by_id(meta->btf, btf_id); 8970 nr_slots = t->size / BPF_REG_SIZE; 8971 8972 if (is_iter_new_kfunc(meta)) { 8973 /* bpf_iter_<type>_new() expects pointer to uninit iter state */ 8974 if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) { 8975 verbose(env, "expected uninitialized iter_%s as arg #%d\n", 8976 iter_type_str(meta->btf, btf_id), regno - 1); 8977 return -EINVAL; 8978 } 8979 8980 for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) { 8981 err = check_mem_access(env, insn_idx, regno, 8982 i, BPF_DW, BPF_WRITE, -1, false, false); 8983 if (err) 8984 return err; 8985 } 8986 8987 err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots); 8988 if (err) 8989 return err; 8990 } else { 8991 /* iter_next() or iter_destroy(), as well as any kfunc 8992 * accepting iter argument, expect initialized iter state 8993 */ 8994 err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots); 8995 switch (err) { 8996 case 0: 8997 break; 8998 case -EINVAL: 8999 verbose(env, "expected an initialized iter_%s as arg #%d\n", 9000 iter_type_str(meta->btf, btf_id), regno - 1); 9001 return err; 9002 case -EPROTO: 9003 verbose(env, "expected an RCU CS when using %s\n", meta->func_name); 9004 return err; 9005 default: 9006 return err; 9007 } 9008 9009 spi = iter_get_spi(env, reg, nr_slots); 9010 if (spi < 0) 9011 return spi; 9012 9013 err = mark_iter_read(env, reg, spi, nr_slots); 9014 if (err) 9015 return err; 9016 9017 /* remember meta->iter info for process_iter_next_call() */ 9018 meta->iter.spi = spi; 9019 meta->iter.frameno = reg->frameno; 9020 meta->ref_obj_id = iter_ref_obj_id(env, reg, spi); 9021 9022 if (is_iter_destroy_kfunc(meta)) { 9023 err = unmark_stack_slots_iter(env, reg, nr_slots); 9024 if (err) 9025 return err; 9026 } 9027 } 9028 9029 return 0; 9030 } 9031 9032 /* Look for a previous loop entry at insn_idx: nearest parent state 9033 * stopped at insn_idx with callsites matching those in cur->frame. 9034 */ 9035 static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env, 9036 struct bpf_verifier_state *cur, 9037 int insn_idx) 9038 { 9039 struct bpf_verifier_state_list *sl; 9040 struct bpf_verifier_state *st; 9041 struct list_head *pos, *head; 9042 9043 /* Explored states are pushed in stack order, most recent states come first */ 9044 head = explored_state(env, insn_idx); 9045 list_for_each(pos, head) { 9046 sl = container_of(pos, struct bpf_verifier_state_list, node); 9047 /* If st->branches != 0 state is a part of current DFS verification path, 9048 * hence cur & st for a loop. 9049 */ 9050 st = &sl->state; 9051 if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) && 9052 st->dfs_depth < cur->dfs_depth) 9053 return st; 9054 } 9055 9056 return NULL; 9057 } 9058 9059 static void reset_idmap_scratch(struct bpf_verifier_env *env); 9060 static bool regs_exact(const struct bpf_reg_state *rold, 9061 const struct bpf_reg_state *rcur, 9062 struct bpf_idmap *idmap); 9063 9064 /* 9065 * Check if scalar registers are exact for the purpose of not widening. 9066 * More lenient than regs_exact() 9067 */ 9068 static bool scalars_exact_for_widen(const struct bpf_reg_state *rold, 9069 const struct bpf_reg_state *rcur) 9070 { 9071 return !memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)); 9072 } 9073 9074 static void maybe_widen_reg(struct bpf_verifier_env *env, 9075 struct bpf_reg_state *rold, struct bpf_reg_state *rcur) 9076 { 9077 if (rold->type != SCALAR_VALUE) 9078 return; 9079 if (rold->type != rcur->type) 9080 return; 9081 if (rold->precise || rcur->precise || scalars_exact_for_widen(rold, rcur)) 9082 return; 9083 __mark_reg_unknown(env, rcur); 9084 } 9085 9086 static int widen_imprecise_scalars(struct bpf_verifier_env *env, 9087 struct bpf_verifier_state *old, 9088 struct bpf_verifier_state *cur) 9089 { 9090 struct bpf_func_state *fold, *fcur; 9091 int i, fr, num_slots; 9092 9093 for (fr = old->curframe; fr >= 0; fr--) { 9094 fold = old->frame[fr]; 9095 fcur = cur->frame[fr]; 9096 9097 for (i = 0; i < MAX_BPF_REG; i++) 9098 maybe_widen_reg(env, 9099 &fold->regs[i], 9100 &fcur->regs[i]); 9101 9102 num_slots = min(fold->allocated_stack / BPF_REG_SIZE, 9103 fcur->allocated_stack / BPF_REG_SIZE); 9104 for (i = 0; i < num_slots; i++) { 9105 if (!is_spilled_reg(&fold->stack[i]) || 9106 !is_spilled_reg(&fcur->stack[i])) 9107 continue; 9108 9109 maybe_widen_reg(env, 9110 &fold->stack[i].spilled_ptr, 9111 &fcur->stack[i].spilled_ptr); 9112 } 9113 } 9114 return 0; 9115 } 9116 9117 static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st, 9118 struct bpf_kfunc_call_arg_meta *meta) 9119 { 9120 int iter_frameno = meta->iter.frameno; 9121 int iter_spi = meta->iter.spi; 9122 9123 return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr; 9124 } 9125 9126 /* process_iter_next_call() is called when verifier gets to iterator's next 9127 * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer 9128 * to it as just "iter_next()" in comments below. 9129 * 9130 * BPF verifier relies on a crucial contract for any iter_next() 9131 * implementation: it should *eventually* return NULL, and once that happens 9132 * it should keep returning NULL. That is, once iterator exhausts elements to 9133 * iterate, it should never reset or spuriously return new elements. 9134 * 9135 * With the assumption of such contract, process_iter_next_call() simulates 9136 * a fork in the verifier state to validate loop logic correctness and safety 9137 * without having to simulate infinite amount of iterations. 9138 * 9139 * In current state, we first assume that iter_next() returned NULL and 9140 * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such 9141 * conditions we should not form an infinite loop and should eventually reach 9142 * exit. 9143 * 9144 * Besides that, we also fork current state and enqueue it for later 9145 * verification. In a forked state we keep iterator state as ACTIVE 9146 * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We 9147 * also bump iteration depth to prevent erroneous infinite loop detection 9148 * later on (see iter_active_depths_differ() comment for details). In this 9149 * state we assume that we'll eventually loop back to another iter_next() 9150 * calls (it could be in exactly same location or in some other instruction, 9151 * it doesn't matter, we don't make any unnecessary assumptions about this, 9152 * everything revolves around iterator state in a stack slot, not which 9153 * instruction is calling iter_next()). When that happens, we either will come 9154 * to iter_next() with equivalent state and can conclude that next iteration 9155 * will proceed in exactly the same way as we just verified, so it's safe to 9156 * assume that loop converges. If not, we'll go on another iteration 9157 * simulation with a different input state, until all possible starting states 9158 * are validated or we reach maximum number of instructions limit. 9159 * 9160 * This way, we will either exhaustively discover all possible input states 9161 * that iterator loop can start with and eventually will converge, or we'll 9162 * effectively regress into bounded loop simulation logic and either reach 9163 * maximum number of instructions if loop is not provably convergent, or there 9164 * is some statically known limit on number of iterations (e.g., if there is 9165 * an explicit `if n > 100 then break;` statement somewhere in the loop). 9166 * 9167 * Iteration convergence logic in is_state_visited() relies on exact 9168 * states comparison, which ignores read and precision marks. 9169 * This is necessary because read and precision marks are not finalized 9170 * while in the loop. Exact comparison might preclude convergence for 9171 * simple programs like below: 9172 * 9173 * i = 0; 9174 * while(iter_next(&it)) 9175 * i++; 9176 * 9177 * At each iteration step i++ would produce a new distinct state and 9178 * eventually instruction processing limit would be reached. 9179 * 9180 * To avoid such behavior speculatively forget (widen) range for 9181 * imprecise scalar registers, if those registers were not precise at the 9182 * end of the previous iteration and do not match exactly. 9183 * 9184 * This is a conservative heuristic that allows to verify wide range of programs, 9185 * however it precludes verification of programs that conjure an 9186 * imprecise value on the first loop iteration and use it as precise on a second. 9187 * For example, the following safe program would fail to verify: 9188 * 9189 * struct bpf_num_iter it; 9190 * int arr[10]; 9191 * int i = 0, a = 0; 9192 * bpf_iter_num_new(&it, 0, 10); 9193 * while (bpf_iter_num_next(&it)) { 9194 * if (a == 0) { 9195 * a = 1; 9196 * i = 7; // Because i changed verifier would forget 9197 * // it's range on second loop entry. 9198 * } else { 9199 * arr[i] = 42; // This would fail to verify. 9200 * } 9201 * } 9202 * bpf_iter_num_destroy(&it); 9203 */ 9204 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, 9205 struct bpf_kfunc_call_arg_meta *meta) 9206 { 9207 struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st; 9208 struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr; 9209 struct bpf_reg_state *cur_iter, *queued_iter; 9210 9211 BTF_TYPE_EMIT(struct bpf_iter); 9212 9213 cur_iter = get_iter_from_state(cur_st, meta); 9214 9215 if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE && 9216 cur_iter->iter.state != BPF_ITER_STATE_DRAINED) { 9217 verifier_bug(env, "unexpected iterator state %d (%s)", 9218 cur_iter->iter.state, iter_state_str(cur_iter->iter.state)); 9219 return -EFAULT; 9220 } 9221 9222 if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) { 9223 /* Because iter_next() call is a checkpoint is_state_visitied() 9224 * should guarantee parent state with same call sites and insn_idx. 9225 */ 9226 if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx || 9227 !same_callsites(cur_st->parent, cur_st)) { 9228 verifier_bug(env, "bad parent state for iter next call"); 9229 return -EFAULT; 9230 } 9231 /* Note cur_st->parent in the call below, it is necessary to skip 9232 * checkpoint created for cur_st by is_state_visited() 9233 * right at this instruction. 9234 */ 9235 prev_st = find_prev_entry(env, cur_st->parent, insn_idx); 9236 /* branch out active iter state */ 9237 queued_st = push_stack(env, insn_idx + 1, insn_idx, false); 9238 if (IS_ERR(queued_st)) 9239 return PTR_ERR(queued_st); 9240 9241 queued_iter = get_iter_from_state(queued_st, meta); 9242 queued_iter->iter.state = BPF_ITER_STATE_ACTIVE; 9243 queued_iter->iter.depth++; 9244 if (prev_st) 9245 widen_imprecise_scalars(env, prev_st, queued_st); 9246 9247 queued_fr = queued_st->frame[queued_st->curframe]; 9248 mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]); 9249 } 9250 9251 /* switch to DRAINED state, but keep the depth unchanged */ 9252 /* mark current iter state as drained and assume returned NULL */ 9253 cur_iter->iter.state = BPF_ITER_STATE_DRAINED; 9254 __mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]); 9255 9256 return 0; 9257 } 9258 9259 static bool arg_type_is_mem_size(enum bpf_arg_type type) 9260 { 9261 return type == ARG_CONST_SIZE || 9262 type == ARG_CONST_SIZE_OR_ZERO; 9263 } 9264 9265 static bool arg_type_is_raw_mem(enum bpf_arg_type type) 9266 { 9267 return base_type(type) == ARG_PTR_TO_MEM && 9268 type & MEM_UNINIT; 9269 } 9270 9271 static bool arg_type_is_release(enum bpf_arg_type type) 9272 { 9273 return type & OBJ_RELEASE; 9274 } 9275 9276 static bool arg_type_is_dynptr(enum bpf_arg_type type) 9277 { 9278 return base_type(type) == ARG_PTR_TO_DYNPTR; 9279 } 9280 9281 static int resolve_map_arg_type(struct bpf_verifier_env *env, 9282 const struct bpf_call_arg_meta *meta, 9283 enum bpf_arg_type *arg_type) 9284 { 9285 if (!meta->map.ptr) { 9286 /* kernel subsystem misconfigured verifier */ 9287 verifier_bug(env, "invalid map_ptr to access map->type"); 9288 return -EFAULT; 9289 } 9290 9291 switch (meta->map.ptr->map_type) { 9292 case BPF_MAP_TYPE_SOCKMAP: 9293 case BPF_MAP_TYPE_SOCKHASH: 9294 if (*arg_type == ARG_PTR_TO_MAP_VALUE) { 9295 *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON; 9296 } else { 9297 verbose(env, "invalid arg_type for sockmap/sockhash\n"); 9298 return -EINVAL; 9299 } 9300 break; 9301 case BPF_MAP_TYPE_BLOOM_FILTER: 9302 if (meta->func_id == BPF_FUNC_map_peek_elem) 9303 *arg_type = ARG_PTR_TO_MAP_VALUE; 9304 break; 9305 default: 9306 break; 9307 } 9308 return 0; 9309 } 9310 9311 struct bpf_reg_types { 9312 const enum bpf_reg_type types[10]; 9313 u32 *btf_id; 9314 }; 9315 9316 static const struct bpf_reg_types sock_types = { 9317 .types = { 9318 PTR_TO_SOCK_COMMON, 9319 PTR_TO_SOCKET, 9320 PTR_TO_TCP_SOCK, 9321 PTR_TO_XDP_SOCK, 9322 }, 9323 }; 9324 9325 #ifdef CONFIG_NET 9326 static const struct bpf_reg_types btf_id_sock_common_types = { 9327 .types = { 9328 PTR_TO_SOCK_COMMON, 9329 PTR_TO_SOCKET, 9330 PTR_TO_TCP_SOCK, 9331 PTR_TO_XDP_SOCK, 9332 PTR_TO_BTF_ID, 9333 PTR_TO_BTF_ID | PTR_TRUSTED, 9334 }, 9335 .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 9336 }; 9337 #endif 9338 9339 static const struct bpf_reg_types mem_types = { 9340 .types = { 9341 PTR_TO_STACK, 9342 PTR_TO_PACKET, 9343 PTR_TO_PACKET_META, 9344 PTR_TO_MAP_KEY, 9345 PTR_TO_MAP_VALUE, 9346 PTR_TO_MEM, 9347 PTR_TO_MEM | MEM_RINGBUF, 9348 PTR_TO_BUF, 9349 PTR_TO_BTF_ID | PTR_TRUSTED, 9350 }, 9351 }; 9352 9353 static const struct bpf_reg_types spin_lock_types = { 9354 .types = { 9355 PTR_TO_MAP_VALUE, 9356 PTR_TO_BTF_ID | MEM_ALLOC, 9357 } 9358 }; 9359 9360 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } }; 9361 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } }; 9362 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } }; 9363 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } }; 9364 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } }; 9365 static const struct bpf_reg_types btf_ptr_types = { 9366 .types = { 9367 PTR_TO_BTF_ID, 9368 PTR_TO_BTF_ID | PTR_TRUSTED, 9369 PTR_TO_BTF_ID | MEM_RCU, 9370 }, 9371 }; 9372 static const struct bpf_reg_types percpu_btf_ptr_types = { 9373 .types = { 9374 PTR_TO_BTF_ID | MEM_PERCPU, 9375 PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU, 9376 PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED, 9377 } 9378 }; 9379 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; 9380 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; 9381 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; 9382 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } }; 9383 static const struct bpf_reg_types kptr_xchg_dest_types = { 9384 .types = { 9385 PTR_TO_MAP_VALUE, 9386 PTR_TO_BTF_ID | MEM_ALLOC 9387 } 9388 }; 9389 static const struct bpf_reg_types dynptr_types = { 9390 .types = { 9391 PTR_TO_STACK, 9392 CONST_PTR_TO_DYNPTR, 9393 } 9394 }; 9395 9396 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { 9397 [ARG_PTR_TO_MAP_KEY] = &mem_types, 9398 [ARG_PTR_TO_MAP_VALUE] = &mem_types, 9399 [ARG_CONST_SIZE] = &scalar_types, 9400 [ARG_CONST_SIZE_OR_ZERO] = &scalar_types, 9401 [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types, 9402 [ARG_CONST_MAP_PTR] = &const_map_ptr_types, 9403 [ARG_PTR_TO_CTX] = &context_types, 9404 [ARG_PTR_TO_SOCK_COMMON] = &sock_types, 9405 #ifdef CONFIG_NET 9406 [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types, 9407 #endif 9408 [ARG_PTR_TO_SOCKET] = &fullsock_types, 9409 [ARG_PTR_TO_BTF_ID] = &btf_ptr_types, 9410 [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, 9411 [ARG_PTR_TO_MEM] = &mem_types, 9412 [ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types, 9413 [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, 9414 [ARG_PTR_TO_FUNC] = &func_ptr_types, 9415 [ARG_PTR_TO_STACK] = &stack_ptr_types, 9416 [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, 9417 [ARG_PTR_TO_TIMER] = &timer_types, 9418 [ARG_KPTR_XCHG_DEST] = &kptr_xchg_dest_types, 9419 [ARG_PTR_TO_DYNPTR] = &dynptr_types, 9420 }; 9421 9422 static int check_reg_type(struct bpf_verifier_env *env, u32 regno, 9423 enum bpf_arg_type arg_type, 9424 const u32 *arg_btf_id, 9425 struct bpf_call_arg_meta *meta) 9426 { 9427 struct bpf_reg_state *reg = reg_state(env, regno); 9428 enum bpf_reg_type expected, type = reg->type; 9429 const struct bpf_reg_types *compatible; 9430 int i, j; 9431 9432 compatible = compatible_reg_types[base_type(arg_type)]; 9433 if (!compatible) { 9434 verifier_bug(env, "unsupported arg type %d", arg_type); 9435 return -EFAULT; 9436 } 9437 9438 /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY, 9439 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY 9440 * 9441 * Same for MAYBE_NULL: 9442 * 9443 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL, 9444 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL 9445 * 9446 * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type. 9447 * 9448 * Therefore we fold these flags depending on the arg_type before comparison. 9449 */ 9450 if (arg_type & MEM_RDONLY) 9451 type &= ~MEM_RDONLY; 9452 if (arg_type & PTR_MAYBE_NULL) 9453 type &= ~PTR_MAYBE_NULL; 9454 if (base_type(arg_type) == ARG_PTR_TO_MEM) 9455 type &= ~DYNPTR_TYPE_FLAG_MASK; 9456 9457 /* Local kptr types are allowed as the source argument of bpf_kptr_xchg */ 9458 if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) { 9459 type &= ~MEM_ALLOC; 9460 type &= ~MEM_PERCPU; 9461 } 9462 9463 for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { 9464 expected = compatible->types[i]; 9465 if (expected == NOT_INIT) 9466 break; 9467 9468 if (type == expected) 9469 goto found; 9470 } 9471 9472 verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type)); 9473 for (j = 0; j + 1 < i; j++) 9474 verbose(env, "%s, ", reg_type_str(env, compatible->types[j])); 9475 verbose(env, "%s\n", reg_type_str(env, compatible->types[j])); 9476 return -EACCES; 9477 9478 found: 9479 if (base_type(reg->type) != PTR_TO_BTF_ID) 9480 return 0; 9481 9482 if (compatible == &mem_types) { 9483 if (!(arg_type & MEM_RDONLY)) { 9484 verbose(env, 9485 "%s() may write into memory pointed by R%d type=%s\n", 9486 func_id_name(meta->func_id), 9487 regno, reg_type_str(env, reg->type)); 9488 return -EACCES; 9489 } 9490 return 0; 9491 } 9492 9493 switch ((int)reg->type) { 9494 case PTR_TO_BTF_ID: 9495 case PTR_TO_BTF_ID | PTR_TRUSTED: 9496 case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL: 9497 case PTR_TO_BTF_ID | MEM_RCU: 9498 case PTR_TO_BTF_ID | PTR_MAYBE_NULL: 9499 case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU: 9500 { 9501 /* For bpf_sk_release, it needs to match against first member 9502 * 'struct sock_common', hence make an exception for it. This 9503 * allows bpf_sk_release to work for multiple socket types. 9504 */ 9505 bool strict_type_match = arg_type_is_release(arg_type) && 9506 meta->func_id != BPF_FUNC_sk_release; 9507 9508 if (type_may_be_null(reg->type) && 9509 (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) { 9510 verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno); 9511 return -EACCES; 9512 } 9513 9514 if (!arg_btf_id) { 9515 if (!compatible->btf_id) { 9516 verifier_bug(env, "missing arg compatible BTF ID"); 9517 return -EFAULT; 9518 } 9519 arg_btf_id = compatible->btf_id; 9520 } 9521 9522 if (meta->func_id == BPF_FUNC_kptr_xchg) { 9523 if (map_kptr_match_type(env, meta->kptr_field, reg, regno)) 9524 return -EACCES; 9525 } else { 9526 if (arg_btf_id == BPF_PTR_POISON) { 9527 verbose(env, "verifier internal error:"); 9528 verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n", 9529 regno); 9530 return -EACCES; 9531 } 9532 9533 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, 9534 btf_vmlinux, *arg_btf_id, 9535 strict_type_match)) { 9536 verbose(env, "R%d is of type %s but %s is expected\n", 9537 regno, btf_type_name(reg->btf, reg->btf_id), 9538 btf_type_name(btf_vmlinux, *arg_btf_id)); 9539 return -EACCES; 9540 } 9541 } 9542 break; 9543 } 9544 case PTR_TO_BTF_ID | MEM_ALLOC: 9545 case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC: 9546 if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock && 9547 meta->func_id != BPF_FUNC_kptr_xchg) { 9548 verifier_bug(env, "unimplemented handling of MEM_ALLOC"); 9549 return -EFAULT; 9550 } 9551 /* Check if local kptr in src arg matches kptr in dst arg */ 9552 if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) { 9553 if (map_kptr_match_type(env, meta->kptr_field, reg, regno)) 9554 return -EACCES; 9555 } 9556 break; 9557 case PTR_TO_BTF_ID | MEM_PERCPU: 9558 case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU: 9559 case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED: 9560 /* Handled by helper specific checks */ 9561 break; 9562 default: 9563 verifier_bug(env, "invalid PTR_TO_BTF_ID register for type match"); 9564 return -EFAULT; 9565 } 9566 return 0; 9567 } 9568 9569 static struct btf_field * 9570 reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields) 9571 { 9572 struct btf_field *field; 9573 struct btf_record *rec; 9574 9575 rec = reg_btf_record(reg); 9576 if (!rec) 9577 return NULL; 9578 9579 field = btf_record_find(rec, off, fields); 9580 if (!field) 9581 return NULL; 9582 9583 return field; 9584 } 9585 9586 static int check_func_arg_reg_off(struct bpf_verifier_env *env, 9587 const struct bpf_reg_state *reg, int regno, 9588 enum bpf_arg_type arg_type) 9589 { 9590 u32 type = reg->type; 9591 9592 /* When referenced register is passed to release function, its fixed 9593 * offset must be 0. 9594 * 9595 * We will check arg_type_is_release reg has ref_obj_id when storing 9596 * meta->release_regno. 9597 */ 9598 if (arg_type_is_release(arg_type)) { 9599 /* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it 9600 * may not directly point to the object being released, but to 9601 * dynptr pointing to such object, which might be at some offset 9602 * on the stack. In that case, we simply to fallback to the 9603 * default handling. 9604 */ 9605 if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK) 9606 return 0; 9607 9608 /* Doing check_ptr_off_reg check for the offset will catch this 9609 * because fixed_off_ok is false, but checking here allows us 9610 * to give the user a better error message. 9611 */ 9612 if (reg->off) { 9613 verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n", 9614 regno); 9615 return -EINVAL; 9616 } 9617 return __check_ptr_off_reg(env, reg, regno, false); 9618 } 9619 9620 switch (type) { 9621 /* Pointer types where both fixed and variable offset is explicitly allowed: */ 9622 case PTR_TO_STACK: 9623 case PTR_TO_PACKET: 9624 case PTR_TO_PACKET_META: 9625 case PTR_TO_MAP_KEY: 9626 case PTR_TO_MAP_VALUE: 9627 case PTR_TO_MEM: 9628 case PTR_TO_MEM | MEM_RDONLY: 9629 case PTR_TO_MEM | MEM_RINGBUF: 9630 case PTR_TO_BUF: 9631 case PTR_TO_BUF | MEM_RDONLY: 9632 case PTR_TO_ARENA: 9633 case SCALAR_VALUE: 9634 return 0; 9635 /* All the rest must be rejected, except PTR_TO_BTF_ID which allows 9636 * fixed offset. 9637 */ 9638 case PTR_TO_BTF_ID: 9639 case PTR_TO_BTF_ID | MEM_ALLOC: 9640 case PTR_TO_BTF_ID | PTR_TRUSTED: 9641 case PTR_TO_BTF_ID | MEM_RCU: 9642 case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF: 9643 case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU: 9644 /* When referenced PTR_TO_BTF_ID is passed to release function, 9645 * its fixed offset must be 0. In the other cases, fixed offset 9646 * can be non-zero. This was already checked above. So pass 9647 * fixed_off_ok as true to allow fixed offset for all other 9648 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we 9649 * still need to do checks instead of returning. 9650 */ 9651 return __check_ptr_off_reg(env, reg, regno, true); 9652 default: 9653 return __check_ptr_off_reg(env, reg, regno, false); 9654 } 9655 } 9656 9657 static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env, 9658 const struct bpf_func_proto *fn, 9659 struct bpf_reg_state *regs) 9660 { 9661 struct bpf_reg_state *state = NULL; 9662 int i; 9663 9664 for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) 9665 if (arg_type_is_dynptr(fn->arg_type[i])) { 9666 if (state) { 9667 verbose(env, "verifier internal error: multiple dynptr args\n"); 9668 return NULL; 9669 } 9670 state = ®s[BPF_REG_1 + i]; 9671 } 9672 9673 if (!state) 9674 verbose(env, "verifier internal error: no dynptr arg found\n"); 9675 9676 return state; 9677 } 9678 9679 static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 9680 { 9681 struct bpf_func_state *state = func(env, reg); 9682 int spi; 9683 9684 if (reg->type == CONST_PTR_TO_DYNPTR) 9685 return reg->id; 9686 spi = dynptr_get_spi(env, reg); 9687 if (spi < 0) 9688 return spi; 9689 return state->stack[spi].spilled_ptr.id; 9690 } 9691 9692 static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 9693 { 9694 struct bpf_func_state *state = func(env, reg); 9695 int spi; 9696 9697 if (reg->type == CONST_PTR_TO_DYNPTR) 9698 return reg->ref_obj_id; 9699 spi = dynptr_get_spi(env, reg); 9700 if (spi < 0) 9701 return spi; 9702 return state->stack[spi].spilled_ptr.ref_obj_id; 9703 } 9704 9705 static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env, 9706 struct bpf_reg_state *reg) 9707 { 9708 struct bpf_func_state *state = func(env, reg); 9709 int spi; 9710 9711 if (reg->type == CONST_PTR_TO_DYNPTR) 9712 return reg->dynptr.type; 9713 9714 spi = __get_spi(reg->off); 9715 if (spi < 0) { 9716 verbose(env, "verifier internal error: invalid spi when querying dynptr type\n"); 9717 return BPF_DYNPTR_TYPE_INVALID; 9718 } 9719 9720 return state->stack[spi].spilled_ptr.dynptr.type; 9721 } 9722 9723 static int check_reg_const_str(struct bpf_verifier_env *env, 9724 struct bpf_reg_state *reg, u32 regno) 9725 { 9726 struct bpf_map *map = reg->map_ptr; 9727 int err; 9728 int map_off; 9729 u64 map_addr; 9730 char *str_ptr; 9731 9732 if (reg->type != PTR_TO_MAP_VALUE) 9733 return -EINVAL; 9734 9735 if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) { 9736 verbose(env, "R%d points to insn_array map which cannot be used as const string\n", regno); 9737 return -EACCES; 9738 } 9739 9740 if (!bpf_map_is_rdonly(map)) { 9741 verbose(env, "R%d does not point to a readonly map'\n", regno); 9742 return -EACCES; 9743 } 9744 9745 if (!tnum_is_const(reg->var_off)) { 9746 verbose(env, "R%d is not a constant address'\n", regno); 9747 return -EACCES; 9748 } 9749 9750 if (!map->ops->map_direct_value_addr) { 9751 verbose(env, "no direct value access support for this map type\n"); 9752 return -EACCES; 9753 } 9754 9755 err = check_map_access(env, regno, reg->off, 9756 map->value_size - reg->off, false, 9757 ACCESS_HELPER); 9758 if (err) 9759 return err; 9760 9761 map_off = reg->off + reg->var_off.value; 9762 err = map->ops->map_direct_value_addr(map, &map_addr, map_off); 9763 if (err) { 9764 verbose(env, "direct value access on string failed\n"); 9765 return err; 9766 } 9767 9768 str_ptr = (char *)(long)(map_addr); 9769 if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) { 9770 verbose(env, "string is not zero-terminated\n"); 9771 return -EINVAL; 9772 } 9773 return 0; 9774 } 9775 9776 /* Returns constant key value in `value` if possible, else negative error */ 9777 static int get_constant_map_key(struct bpf_verifier_env *env, 9778 struct bpf_reg_state *key, 9779 u32 key_size, 9780 s64 *value) 9781 { 9782 struct bpf_func_state *state = func(env, key); 9783 struct bpf_reg_state *reg; 9784 int slot, spi, off; 9785 int spill_size = 0; 9786 int zero_size = 0; 9787 int stack_off; 9788 int i, err; 9789 u8 *stype; 9790 9791 if (!env->bpf_capable) 9792 return -EOPNOTSUPP; 9793 if (key->type != PTR_TO_STACK) 9794 return -EOPNOTSUPP; 9795 if (!tnum_is_const(key->var_off)) 9796 return -EOPNOTSUPP; 9797 9798 stack_off = key->off + key->var_off.value; 9799 slot = -stack_off - 1; 9800 spi = slot / BPF_REG_SIZE; 9801 off = slot % BPF_REG_SIZE; 9802 stype = state->stack[spi].slot_type; 9803 9804 /* First handle precisely tracked STACK_ZERO */ 9805 for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--) 9806 zero_size++; 9807 if (zero_size >= key_size) { 9808 *value = 0; 9809 return 0; 9810 } 9811 9812 /* Check that stack contains a scalar spill of expected size */ 9813 if (!is_spilled_scalar_reg(&state->stack[spi])) 9814 return -EOPNOTSUPP; 9815 for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--) 9816 spill_size++; 9817 if (spill_size != key_size) 9818 return -EOPNOTSUPP; 9819 9820 reg = &state->stack[spi].spilled_ptr; 9821 if (!tnum_is_const(reg->var_off)) 9822 /* Stack value not statically known */ 9823 return -EOPNOTSUPP; 9824 9825 /* We are relying on a constant value. So mark as precise 9826 * to prevent pruning on it. 9827 */ 9828 bt_set_frame_slot(&env->bt, key->frameno, spi); 9829 err = mark_chain_precision_batch(env, env->cur_state); 9830 if (err < 0) 9831 return err; 9832 9833 *value = reg->var_off.value; 9834 return 0; 9835 } 9836 9837 static bool can_elide_value_nullness(enum bpf_map_type type); 9838 9839 static int check_func_arg(struct bpf_verifier_env *env, u32 arg, 9840 struct bpf_call_arg_meta *meta, 9841 const struct bpf_func_proto *fn, 9842 int insn_idx) 9843 { 9844 u32 regno = BPF_REG_1 + arg; 9845 struct bpf_reg_state *reg = reg_state(env, regno); 9846 enum bpf_arg_type arg_type = fn->arg_type[arg]; 9847 enum bpf_reg_type type = reg->type; 9848 u32 *arg_btf_id = NULL; 9849 u32 key_size; 9850 int err = 0; 9851 9852 if (arg_type == ARG_DONTCARE) 9853 return 0; 9854 9855 err = check_reg_arg(env, regno, SRC_OP); 9856 if (err) 9857 return err; 9858 9859 if (arg_type == ARG_ANYTHING) { 9860 if (is_pointer_value(env, regno)) { 9861 verbose(env, "R%d leaks addr into helper function\n", 9862 regno); 9863 return -EACCES; 9864 } 9865 return 0; 9866 } 9867 9868 if (type_is_pkt_pointer(type) && 9869 !may_access_direct_pkt_data(env, meta, BPF_READ)) { 9870 verbose(env, "helper access to the packet is not allowed\n"); 9871 return -EACCES; 9872 } 9873 9874 if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) { 9875 err = resolve_map_arg_type(env, meta, &arg_type); 9876 if (err) 9877 return err; 9878 } 9879 9880 if (register_is_null(reg) && type_may_be_null(arg_type)) 9881 /* A NULL register has a SCALAR_VALUE type, so skip 9882 * type checking. 9883 */ 9884 goto skip_type_check; 9885 9886 /* arg_btf_id and arg_size are in a union. */ 9887 if (base_type(arg_type) == ARG_PTR_TO_BTF_ID || 9888 base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK) 9889 arg_btf_id = fn->arg_btf_id[arg]; 9890 9891 err = check_reg_type(env, regno, arg_type, arg_btf_id, meta); 9892 if (err) 9893 return err; 9894 9895 err = check_func_arg_reg_off(env, reg, regno, arg_type); 9896 if (err) 9897 return err; 9898 9899 skip_type_check: 9900 if (arg_type_is_release(arg_type)) { 9901 if (arg_type_is_dynptr(arg_type)) { 9902 struct bpf_func_state *state = func(env, reg); 9903 int spi; 9904 9905 /* Only dynptr created on stack can be released, thus 9906 * the get_spi and stack state checks for spilled_ptr 9907 * should only be done before process_dynptr_func for 9908 * PTR_TO_STACK. 9909 */ 9910 if (reg->type == PTR_TO_STACK) { 9911 spi = dynptr_get_spi(env, reg); 9912 if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) { 9913 verbose(env, "arg %d is an unacquired reference\n", regno); 9914 return -EINVAL; 9915 } 9916 } else { 9917 verbose(env, "cannot release unowned const bpf_dynptr\n"); 9918 return -EINVAL; 9919 } 9920 } else if (!reg->ref_obj_id && !register_is_null(reg)) { 9921 verbose(env, "R%d must be referenced when passed to release function\n", 9922 regno); 9923 return -EINVAL; 9924 } 9925 if (meta->release_regno) { 9926 verifier_bug(env, "more than one release argument"); 9927 return -EFAULT; 9928 } 9929 meta->release_regno = regno; 9930 } 9931 9932 if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) { 9933 if (meta->ref_obj_id) { 9934 verbose(env, "more than one arg with ref_obj_id R%d %u %u", 9935 regno, reg->ref_obj_id, 9936 meta->ref_obj_id); 9937 return -EACCES; 9938 } 9939 meta->ref_obj_id = reg->ref_obj_id; 9940 } 9941 9942 switch (base_type(arg_type)) { 9943 case ARG_CONST_MAP_PTR: 9944 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */ 9945 if (meta->map.ptr) { 9946 /* Use map_uid (which is unique id of inner map) to reject: 9947 * inner_map1 = bpf_map_lookup_elem(outer_map, key1) 9948 * inner_map2 = bpf_map_lookup_elem(outer_map, key2) 9949 * if (inner_map1 && inner_map2) { 9950 * timer = bpf_map_lookup_elem(inner_map1); 9951 * if (timer) 9952 * // mismatch would have been allowed 9953 * bpf_timer_init(timer, inner_map2); 9954 * } 9955 * 9956 * Comparing map_ptr is enough to distinguish normal and outer maps. 9957 */ 9958 if (meta->map.ptr != reg->map_ptr || 9959 meta->map.uid != reg->map_uid) { 9960 verbose(env, 9961 "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n", 9962 meta->map.uid, reg->map_uid); 9963 return -EINVAL; 9964 } 9965 } 9966 meta->map.ptr = reg->map_ptr; 9967 meta->map.uid = reg->map_uid; 9968 break; 9969 case ARG_PTR_TO_MAP_KEY: 9970 /* bpf_map_xxx(..., map_ptr, ..., key) call: 9971 * check that [key, key + map->key_size) are within 9972 * stack limits and initialized 9973 */ 9974 if (!meta->map.ptr) { 9975 /* in function declaration map_ptr must come before 9976 * map_key, so that it's verified and known before 9977 * we have to check map_key here. Otherwise it means 9978 * that kernel subsystem misconfigured verifier 9979 */ 9980 verifier_bug(env, "invalid map_ptr to access map->key"); 9981 return -EFAULT; 9982 } 9983 key_size = meta->map.ptr->key_size; 9984 err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL); 9985 if (err) 9986 return err; 9987 if (can_elide_value_nullness(meta->map.ptr->map_type)) { 9988 err = get_constant_map_key(env, reg, key_size, &meta->const_map_key); 9989 if (err < 0) { 9990 meta->const_map_key = -1; 9991 if (err == -EOPNOTSUPP) 9992 err = 0; 9993 else 9994 return err; 9995 } 9996 } 9997 break; 9998 case ARG_PTR_TO_MAP_VALUE: 9999 if (type_may_be_null(arg_type) && register_is_null(reg)) 10000 return 0; 10001 10002 /* bpf_map_xxx(..., map_ptr, ..., value) call: 10003 * check [value, value + map->value_size) validity 10004 */ 10005 if (!meta->map.ptr) { 10006 /* kernel subsystem misconfigured verifier */ 10007 verifier_bug(env, "invalid map_ptr to access map->value"); 10008 return -EFAULT; 10009 } 10010 meta->raw_mode = arg_type & MEM_UNINIT; 10011 err = check_helper_mem_access(env, regno, meta->map.ptr->value_size, 10012 arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ, 10013 false, meta); 10014 break; 10015 case ARG_PTR_TO_PERCPU_BTF_ID: 10016 if (!reg->btf_id) { 10017 verbose(env, "Helper has invalid btf_id in R%d\n", regno); 10018 return -EACCES; 10019 } 10020 meta->ret_btf = reg->btf; 10021 meta->ret_btf_id = reg->btf_id; 10022 break; 10023 case ARG_PTR_TO_SPIN_LOCK: 10024 if (in_rbtree_lock_required_cb(env)) { 10025 verbose(env, "can't spin_{lock,unlock} in rbtree cb\n"); 10026 return -EACCES; 10027 } 10028 if (meta->func_id == BPF_FUNC_spin_lock) { 10029 err = process_spin_lock(env, regno, PROCESS_SPIN_LOCK); 10030 if (err) 10031 return err; 10032 } else if (meta->func_id == BPF_FUNC_spin_unlock) { 10033 err = process_spin_lock(env, regno, 0); 10034 if (err) 10035 return err; 10036 } else { 10037 verifier_bug(env, "spin lock arg on unexpected helper"); 10038 return -EFAULT; 10039 } 10040 break; 10041 case ARG_PTR_TO_TIMER: 10042 err = process_timer_helper(env, regno, meta); 10043 if (err) 10044 return err; 10045 break; 10046 case ARG_PTR_TO_FUNC: 10047 meta->subprogno = reg->subprogno; 10048 break; 10049 case ARG_PTR_TO_MEM: 10050 /* The access to this pointer is only checked when we hit the 10051 * next is_mem_size argument below. 10052 */ 10053 meta->raw_mode = arg_type & MEM_UNINIT; 10054 if (arg_type & MEM_FIXED_SIZE) { 10055 err = check_helper_mem_access(env, regno, fn->arg_size[arg], 10056 arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ, 10057 false, meta); 10058 if (err) 10059 return err; 10060 if (arg_type & MEM_ALIGNED) 10061 err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true); 10062 } 10063 break; 10064 case ARG_CONST_SIZE: 10065 err = check_mem_size_reg(env, reg, regno, 10066 fn->arg_type[arg - 1] & MEM_WRITE ? 10067 BPF_WRITE : BPF_READ, 10068 false, meta); 10069 break; 10070 case ARG_CONST_SIZE_OR_ZERO: 10071 err = check_mem_size_reg(env, reg, regno, 10072 fn->arg_type[arg - 1] & MEM_WRITE ? 10073 BPF_WRITE : BPF_READ, 10074 true, meta); 10075 break; 10076 case ARG_PTR_TO_DYNPTR: 10077 err = process_dynptr_func(env, regno, insn_idx, arg_type, 0); 10078 if (err) 10079 return err; 10080 break; 10081 case ARG_CONST_ALLOC_SIZE_OR_ZERO: 10082 if (!tnum_is_const(reg->var_off)) { 10083 verbose(env, "R%d is not a known constant'\n", 10084 regno); 10085 return -EACCES; 10086 } 10087 meta->mem_size = reg->var_off.value; 10088 err = mark_chain_precision(env, regno); 10089 if (err) 10090 return err; 10091 break; 10092 case ARG_PTR_TO_CONST_STR: 10093 { 10094 err = check_reg_const_str(env, reg, regno); 10095 if (err) 10096 return err; 10097 break; 10098 } 10099 case ARG_KPTR_XCHG_DEST: 10100 err = process_kptr_func(env, regno, meta); 10101 if (err) 10102 return err; 10103 break; 10104 } 10105 10106 return err; 10107 } 10108 10109 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) 10110 { 10111 enum bpf_attach_type eatype = env->prog->expected_attach_type; 10112 enum bpf_prog_type type = resolve_prog_type(env->prog); 10113 10114 if (func_id != BPF_FUNC_map_update_elem && 10115 func_id != BPF_FUNC_map_delete_elem) 10116 return false; 10117 10118 /* It's not possible to get access to a locked struct sock in these 10119 * contexts, so updating is safe. 10120 */ 10121 switch (type) { 10122 case BPF_PROG_TYPE_TRACING: 10123 if (eatype == BPF_TRACE_ITER) 10124 return true; 10125 break; 10126 case BPF_PROG_TYPE_SOCK_OPS: 10127 /* map_update allowed only via dedicated helpers with event type checks */ 10128 if (func_id == BPF_FUNC_map_delete_elem) 10129 return true; 10130 break; 10131 case BPF_PROG_TYPE_SOCKET_FILTER: 10132 case BPF_PROG_TYPE_SCHED_CLS: 10133 case BPF_PROG_TYPE_SCHED_ACT: 10134 case BPF_PROG_TYPE_XDP: 10135 case BPF_PROG_TYPE_SK_REUSEPORT: 10136 case BPF_PROG_TYPE_FLOW_DISSECTOR: 10137 case BPF_PROG_TYPE_SK_LOOKUP: 10138 return true; 10139 default: 10140 break; 10141 } 10142 10143 verbose(env, "cannot update sockmap in this context\n"); 10144 return false; 10145 } 10146 10147 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env) 10148 { 10149 return env->prog->jit_requested && 10150 bpf_jit_supports_subprog_tailcalls(); 10151 } 10152 10153 static int check_map_func_compatibility(struct bpf_verifier_env *env, 10154 struct bpf_map *map, int func_id) 10155 { 10156 if (!map) 10157 return 0; 10158 10159 /* We need a two way check, first is from map perspective ... */ 10160 switch (map->map_type) { 10161 case BPF_MAP_TYPE_PROG_ARRAY: 10162 if (func_id != BPF_FUNC_tail_call) 10163 goto error; 10164 break; 10165 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 10166 if (func_id != BPF_FUNC_perf_event_read && 10167 func_id != BPF_FUNC_perf_event_output && 10168 func_id != BPF_FUNC_skb_output && 10169 func_id != BPF_FUNC_perf_event_read_value && 10170 func_id != BPF_FUNC_xdp_output) 10171 goto error; 10172 break; 10173 case BPF_MAP_TYPE_RINGBUF: 10174 if (func_id != BPF_FUNC_ringbuf_output && 10175 func_id != BPF_FUNC_ringbuf_reserve && 10176 func_id != BPF_FUNC_ringbuf_query && 10177 func_id != BPF_FUNC_ringbuf_reserve_dynptr && 10178 func_id != BPF_FUNC_ringbuf_submit_dynptr && 10179 func_id != BPF_FUNC_ringbuf_discard_dynptr) 10180 goto error; 10181 break; 10182 case BPF_MAP_TYPE_USER_RINGBUF: 10183 if (func_id != BPF_FUNC_user_ringbuf_drain) 10184 goto error; 10185 break; 10186 case BPF_MAP_TYPE_STACK_TRACE: 10187 if (func_id != BPF_FUNC_get_stackid) 10188 goto error; 10189 break; 10190 case BPF_MAP_TYPE_CGROUP_ARRAY: 10191 if (func_id != BPF_FUNC_skb_under_cgroup && 10192 func_id != BPF_FUNC_current_task_under_cgroup) 10193 goto error; 10194 break; 10195 case BPF_MAP_TYPE_CGROUP_STORAGE: 10196 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: 10197 if (func_id != BPF_FUNC_get_local_storage) 10198 goto error; 10199 break; 10200 case BPF_MAP_TYPE_DEVMAP: 10201 case BPF_MAP_TYPE_DEVMAP_HASH: 10202 if (func_id != BPF_FUNC_redirect_map && 10203 func_id != BPF_FUNC_map_lookup_elem) 10204 goto error; 10205 break; 10206 /* Restrict bpf side of cpumap and xskmap, open when use-cases 10207 * appear. 10208 */ 10209 case BPF_MAP_TYPE_CPUMAP: 10210 if (func_id != BPF_FUNC_redirect_map) 10211 goto error; 10212 break; 10213 case BPF_MAP_TYPE_XSKMAP: 10214 if (func_id != BPF_FUNC_redirect_map && 10215 func_id != BPF_FUNC_map_lookup_elem) 10216 goto error; 10217 break; 10218 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 10219 case BPF_MAP_TYPE_HASH_OF_MAPS: 10220 if (func_id != BPF_FUNC_map_lookup_elem) 10221 goto error; 10222 break; 10223 case BPF_MAP_TYPE_SOCKMAP: 10224 if (func_id != BPF_FUNC_sk_redirect_map && 10225 func_id != BPF_FUNC_sock_map_update && 10226 func_id != BPF_FUNC_msg_redirect_map && 10227 func_id != BPF_FUNC_sk_select_reuseport && 10228 func_id != BPF_FUNC_map_lookup_elem && 10229 !may_update_sockmap(env, func_id)) 10230 goto error; 10231 break; 10232 case BPF_MAP_TYPE_SOCKHASH: 10233 if (func_id != BPF_FUNC_sk_redirect_hash && 10234 func_id != BPF_FUNC_sock_hash_update && 10235 func_id != BPF_FUNC_msg_redirect_hash && 10236 func_id != BPF_FUNC_sk_select_reuseport && 10237 func_id != BPF_FUNC_map_lookup_elem && 10238 !may_update_sockmap(env, func_id)) 10239 goto error; 10240 break; 10241 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: 10242 if (func_id != BPF_FUNC_sk_select_reuseport) 10243 goto error; 10244 break; 10245 case BPF_MAP_TYPE_QUEUE: 10246 case BPF_MAP_TYPE_STACK: 10247 if (func_id != BPF_FUNC_map_peek_elem && 10248 func_id != BPF_FUNC_map_pop_elem && 10249 func_id != BPF_FUNC_map_push_elem) 10250 goto error; 10251 break; 10252 case BPF_MAP_TYPE_SK_STORAGE: 10253 if (func_id != BPF_FUNC_sk_storage_get && 10254 func_id != BPF_FUNC_sk_storage_delete && 10255 func_id != BPF_FUNC_kptr_xchg) 10256 goto error; 10257 break; 10258 case BPF_MAP_TYPE_INODE_STORAGE: 10259 if (func_id != BPF_FUNC_inode_storage_get && 10260 func_id != BPF_FUNC_inode_storage_delete && 10261 func_id != BPF_FUNC_kptr_xchg) 10262 goto error; 10263 break; 10264 case BPF_MAP_TYPE_TASK_STORAGE: 10265 if (func_id != BPF_FUNC_task_storage_get && 10266 func_id != BPF_FUNC_task_storage_delete && 10267 func_id != BPF_FUNC_kptr_xchg) 10268 goto error; 10269 break; 10270 case BPF_MAP_TYPE_CGRP_STORAGE: 10271 if (func_id != BPF_FUNC_cgrp_storage_get && 10272 func_id != BPF_FUNC_cgrp_storage_delete && 10273 func_id != BPF_FUNC_kptr_xchg) 10274 goto error; 10275 break; 10276 case BPF_MAP_TYPE_BLOOM_FILTER: 10277 if (func_id != BPF_FUNC_map_peek_elem && 10278 func_id != BPF_FUNC_map_push_elem) 10279 goto error; 10280 break; 10281 case BPF_MAP_TYPE_INSN_ARRAY: 10282 goto error; 10283 default: 10284 break; 10285 } 10286 10287 /* ... and second from the function itself. */ 10288 switch (func_id) { 10289 case BPF_FUNC_tail_call: 10290 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) 10291 goto error; 10292 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) { 10293 verbose(env, "mixing of tail_calls and bpf-to-bpf calls is not supported\n"); 10294 return -EINVAL; 10295 } 10296 break; 10297 case BPF_FUNC_perf_event_read: 10298 case BPF_FUNC_perf_event_output: 10299 case BPF_FUNC_perf_event_read_value: 10300 case BPF_FUNC_skb_output: 10301 case BPF_FUNC_xdp_output: 10302 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) 10303 goto error; 10304 break; 10305 case BPF_FUNC_ringbuf_output: 10306 case BPF_FUNC_ringbuf_reserve: 10307 case BPF_FUNC_ringbuf_query: 10308 case BPF_FUNC_ringbuf_reserve_dynptr: 10309 case BPF_FUNC_ringbuf_submit_dynptr: 10310 case BPF_FUNC_ringbuf_discard_dynptr: 10311 if (map->map_type != BPF_MAP_TYPE_RINGBUF) 10312 goto error; 10313 break; 10314 case BPF_FUNC_user_ringbuf_drain: 10315 if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF) 10316 goto error; 10317 break; 10318 case BPF_FUNC_get_stackid: 10319 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) 10320 goto error; 10321 break; 10322 case BPF_FUNC_current_task_under_cgroup: 10323 case BPF_FUNC_skb_under_cgroup: 10324 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) 10325 goto error; 10326 break; 10327 case BPF_FUNC_redirect_map: 10328 if (map->map_type != BPF_MAP_TYPE_DEVMAP && 10329 map->map_type != BPF_MAP_TYPE_DEVMAP_HASH && 10330 map->map_type != BPF_MAP_TYPE_CPUMAP && 10331 map->map_type != BPF_MAP_TYPE_XSKMAP) 10332 goto error; 10333 break; 10334 case BPF_FUNC_sk_redirect_map: 10335 case BPF_FUNC_msg_redirect_map: 10336 case BPF_FUNC_sock_map_update: 10337 if (map->map_type != BPF_MAP_TYPE_SOCKMAP) 10338 goto error; 10339 break; 10340 case BPF_FUNC_sk_redirect_hash: 10341 case BPF_FUNC_msg_redirect_hash: 10342 case BPF_FUNC_sock_hash_update: 10343 if (map->map_type != BPF_MAP_TYPE_SOCKHASH) 10344 goto error; 10345 break; 10346 case BPF_FUNC_get_local_storage: 10347 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 10348 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 10349 goto error; 10350 break; 10351 case BPF_FUNC_sk_select_reuseport: 10352 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && 10353 map->map_type != BPF_MAP_TYPE_SOCKMAP && 10354 map->map_type != BPF_MAP_TYPE_SOCKHASH) 10355 goto error; 10356 break; 10357 case BPF_FUNC_map_pop_elem: 10358 if (map->map_type != BPF_MAP_TYPE_QUEUE && 10359 map->map_type != BPF_MAP_TYPE_STACK) 10360 goto error; 10361 break; 10362 case BPF_FUNC_map_peek_elem: 10363 case BPF_FUNC_map_push_elem: 10364 if (map->map_type != BPF_MAP_TYPE_QUEUE && 10365 map->map_type != BPF_MAP_TYPE_STACK && 10366 map->map_type != BPF_MAP_TYPE_BLOOM_FILTER) 10367 goto error; 10368 break; 10369 case BPF_FUNC_map_lookup_percpu_elem: 10370 if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && 10371 map->map_type != BPF_MAP_TYPE_PERCPU_HASH && 10372 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH) 10373 goto error; 10374 break; 10375 case BPF_FUNC_sk_storage_get: 10376 case BPF_FUNC_sk_storage_delete: 10377 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) 10378 goto error; 10379 break; 10380 case BPF_FUNC_inode_storage_get: 10381 case BPF_FUNC_inode_storage_delete: 10382 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE) 10383 goto error; 10384 break; 10385 case BPF_FUNC_task_storage_get: 10386 case BPF_FUNC_task_storage_delete: 10387 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) 10388 goto error; 10389 break; 10390 case BPF_FUNC_cgrp_storage_get: 10391 case BPF_FUNC_cgrp_storage_delete: 10392 if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) 10393 goto error; 10394 break; 10395 default: 10396 break; 10397 } 10398 10399 return 0; 10400 error: 10401 verbose(env, "cannot pass map_type %d into func %s#%d\n", 10402 map->map_type, func_id_name(func_id), func_id); 10403 return -EINVAL; 10404 } 10405 10406 static bool check_raw_mode_ok(const struct bpf_func_proto *fn) 10407 { 10408 int count = 0; 10409 10410 if (arg_type_is_raw_mem(fn->arg1_type)) 10411 count++; 10412 if (arg_type_is_raw_mem(fn->arg2_type)) 10413 count++; 10414 if (arg_type_is_raw_mem(fn->arg3_type)) 10415 count++; 10416 if (arg_type_is_raw_mem(fn->arg4_type)) 10417 count++; 10418 if (arg_type_is_raw_mem(fn->arg5_type)) 10419 count++; 10420 10421 /* We only support one arg being in raw mode at the moment, 10422 * which is sufficient for the helper functions we have 10423 * right now. 10424 */ 10425 return count <= 1; 10426 } 10427 10428 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg) 10429 { 10430 bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE; 10431 bool has_size = fn->arg_size[arg] != 0; 10432 bool is_next_size = false; 10433 10434 if (arg + 1 < ARRAY_SIZE(fn->arg_type)) 10435 is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]); 10436 10437 if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM) 10438 return is_next_size; 10439 10440 return has_size == is_next_size || is_next_size == is_fixed; 10441 } 10442 10443 static bool check_arg_pair_ok(const struct bpf_func_proto *fn) 10444 { 10445 /* bpf_xxx(..., buf, len) call will access 'len' 10446 * bytes from memory 'buf'. Both arg types need 10447 * to be paired, so make sure there's no buggy 10448 * helper function specification. 10449 */ 10450 if (arg_type_is_mem_size(fn->arg1_type) || 10451 check_args_pair_invalid(fn, 0) || 10452 check_args_pair_invalid(fn, 1) || 10453 check_args_pair_invalid(fn, 2) || 10454 check_args_pair_invalid(fn, 3) || 10455 check_args_pair_invalid(fn, 4)) 10456 return false; 10457 10458 return true; 10459 } 10460 10461 static bool check_btf_id_ok(const struct bpf_func_proto *fn) 10462 { 10463 int i; 10464 10465 for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { 10466 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID) 10467 return !!fn->arg_btf_id[i]; 10468 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK) 10469 return fn->arg_btf_id[i] == BPF_PTR_POISON; 10470 if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] && 10471 /* arg_btf_id and arg_size are in a union. */ 10472 (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM || 10473 !(fn->arg_type[i] & MEM_FIXED_SIZE))) 10474 return false; 10475 } 10476 10477 return true; 10478 } 10479 10480 static bool check_mem_arg_rw_flag_ok(const struct bpf_func_proto *fn) 10481 { 10482 int i; 10483 10484 for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { 10485 enum bpf_arg_type arg_type = fn->arg_type[i]; 10486 10487 if (base_type(arg_type) != ARG_PTR_TO_MEM) 10488 continue; 10489 if (!(arg_type & (MEM_WRITE | MEM_RDONLY))) 10490 return false; 10491 } 10492 10493 return true; 10494 } 10495 10496 static int check_func_proto(const struct bpf_func_proto *fn) 10497 { 10498 return check_raw_mode_ok(fn) && 10499 check_arg_pair_ok(fn) && 10500 check_mem_arg_rw_flag_ok(fn) && 10501 check_btf_id_ok(fn) ? 0 : -EINVAL; 10502 } 10503 10504 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] 10505 * are now invalid, so turn them into unknown SCALAR_VALUE. 10506 * 10507 * This also applies to dynptr slices belonging to skb and xdp dynptrs, 10508 * since these slices point to packet data. 10509 */ 10510 static void clear_all_pkt_pointers(struct bpf_verifier_env *env) 10511 { 10512 struct bpf_func_state *state; 10513 struct bpf_reg_state *reg; 10514 10515 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ 10516 if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg)) 10517 mark_reg_invalid(env, reg); 10518 })); 10519 } 10520 10521 enum { 10522 AT_PKT_END = -1, 10523 BEYOND_PKT_END = -2, 10524 }; 10525 10526 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open) 10527 { 10528 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 10529 struct bpf_reg_state *reg = &state->regs[regn]; 10530 10531 if (reg->type != PTR_TO_PACKET) 10532 /* PTR_TO_PACKET_META is not supported yet */ 10533 return; 10534 10535 /* The 'reg' is pkt > pkt_end or pkt >= pkt_end. 10536 * How far beyond pkt_end it goes is unknown. 10537 * if (!range_open) it's the case of pkt >= pkt_end 10538 * if (range_open) it's the case of pkt > pkt_end 10539 * hence this pointer is at least 1 byte bigger than pkt_end 10540 */ 10541 if (range_open) 10542 reg->range = BEYOND_PKT_END; 10543 else 10544 reg->range = AT_PKT_END; 10545 } 10546 10547 static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id) 10548 { 10549 int i; 10550 10551 for (i = 0; i < state->acquired_refs; i++) { 10552 if (state->refs[i].type != REF_TYPE_PTR) 10553 continue; 10554 if (state->refs[i].id == ref_obj_id) { 10555 release_reference_state(state, i); 10556 return 0; 10557 } 10558 } 10559 return -EINVAL; 10560 } 10561 10562 /* The pointer with the specified id has released its reference to kernel 10563 * resources. Identify all copies of the same pointer and clear the reference. 10564 * 10565 * This is the release function corresponding to acquire_reference(). Idempotent. 10566 */ 10567 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id) 10568 { 10569 struct bpf_verifier_state *vstate = env->cur_state; 10570 struct bpf_func_state *state; 10571 struct bpf_reg_state *reg; 10572 int err; 10573 10574 err = release_reference_nomark(vstate, ref_obj_id); 10575 if (err) 10576 return err; 10577 10578 bpf_for_each_reg_in_vstate(vstate, state, reg, ({ 10579 if (reg->ref_obj_id == ref_obj_id) 10580 mark_reg_invalid(env, reg); 10581 })); 10582 10583 return 0; 10584 } 10585 10586 static void invalidate_non_owning_refs(struct bpf_verifier_env *env) 10587 { 10588 struct bpf_func_state *unused; 10589 struct bpf_reg_state *reg; 10590 10591 bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({ 10592 if (type_is_non_owning_ref(reg->type)) 10593 mark_reg_invalid(env, reg); 10594 })); 10595 } 10596 10597 static void clear_caller_saved_regs(struct bpf_verifier_env *env, 10598 struct bpf_reg_state *regs) 10599 { 10600 int i; 10601 10602 /* after the call registers r0 - r5 were scratched */ 10603 for (i = 0; i < CALLER_SAVED_REGS; i++) { 10604 mark_reg_not_init(env, regs, caller_saved[i]); 10605 __check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK); 10606 } 10607 } 10608 10609 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env, 10610 struct bpf_func_state *caller, 10611 struct bpf_func_state *callee, 10612 int insn_idx); 10613 10614 static int set_callee_state(struct bpf_verifier_env *env, 10615 struct bpf_func_state *caller, 10616 struct bpf_func_state *callee, int insn_idx); 10617 10618 static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite, 10619 set_callee_state_fn set_callee_state_cb, 10620 struct bpf_verifier_state *state) 10621 { 10622 struct bpf_func_state *caller, *callee; 10623 int err; 10624 10625 if (state->curframe + 1 >= MAX_CALL_FRAMES) { 10626 verbose(env, "the call stack of %d frames is too deep\n", 10627 state->curframe + 2); 10628 return -E2BIG; 10629 } 10630 10631 if (state->frame[state->curframe + 1]) { 10632 verifier_bug(env, "Frame %d already allocated", state->curframe + 1); 10633 return -EFAULT; 10634 } 10635 10636 caller = state->frame[state->curframe]; 10637 callee = kzalloc_obj(*callee, GFP_KERNEL_ACCOUNT); 10638 if (!callee) 10639 return -ENOMEM; 10640 state->frame[state->curframe + 1] = callee; 10641 10642 /* callee cannot access r0, r6 - r9 for reading and has to write 10643 * into its own stack before reading from it. 10644 * callee can read/write into caller's stack 10645 */ 10646 init_func_state(env, callee, 10647 /* remember the callsite, it will be used by bpf_exit */ 10648 callsite, 10649 state->curframe + 1 /* frameno within this callchain */, 10650 subprog /* subprog number within this prog */); 10651 err = set_callee_state_cb(env, caller, callee, callsite); 10652 if (err) 10653 goto err_out; 10654 10655 /* only increment it after check_reg_arg() finished */ 10656 state->curframe++; 10657 10658 return 0; 10659 10660 err_out: 10661 free_func_state(callee); 10662 state->frame[state->curframe + 1] = NULL; 10663 return err; 10664 } 10665 10666 static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, 10667 const struct btf *btf, 10668 struct bpf_reg_state *regs) 10669 { 10670 struct bpf_subprog_info *sub = subprog_info(env, subprog); 10671 struct bpf_verifier_log *log = &env->log; 10672 u32 i; 10673 int ret; 10674 10675 ret = btf_prepare_func_args(env, subprog); 10676 if (ret) 10677 return ret; 10678 10679 /* check that BTF function arguments match actual types that the 10680 * verifier sees. 10681 */ 10682 for (i = 0; i < sub->arg_cnt; i++) { 10683 u32 regno = i + 1; 10684 struct bpf_reg_state *reg = ®s[regno]; 10685 struct bpf_subprog_arg_info *arg = &sub->args[i]; 10686 10687 if (arg->arg_type == ARG_ANYTHING) { 10688 if (reg->type != SCALAR_VALUE) { 10689 bpf_log(log, "R%d is not a scalar\n", regno); 10690 return -EINVAL; 10691 } 10692 } else if (arg->arg_type & PTR_UNTRUSTED) { 10693 /* 10694 * Anything is allowed for untrusted arguments, as these are 10695 * read-only and probe read instructions would protect against 10696 * invalid memory access. 10697 */ 10698 } else if (arg->arg_type == ARG_PTR_TO_CTX) { 10699 ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE); 10700 if (ret < 0) 10701 return ret; 10702 /* If function expects ctx type in BTF check that caller 10703 * is passing PTR_TO_CTX. 10704 */ 10705 if (reg->type != PTR_TO_CTX) { 10706 bpf_log(log, "arg#%d expects pointer to ctx\n", i); 10707 return -EINVAL; 10708 } 10709 } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) { 10710 ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE); 10711 if (ret < 0) 10712 return ret; 10713 if (check_mem_reg(env, reg, regno, arg->mem_size)) 10714 return -EINVAL; 10715 if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) { 10716 bpf_log(log, "arg#%d is expected to be non-NULL\n", i); 10717 return -EINVAL; 10718 } 10719 } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) { 10720 /* 10721 * Can pass any value and the kernel won't crash, but 10722 * only PTR_TO_ARENA or SCALAR make sense. Everything 10723 * else is a bug in the bpf program. Point it out to 10724 * the user at the verification time instead of 10725 * run-time debug nightmare. 10726 */ 10727 if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) { 10728 bpf_log(log, "R%d is not a pointer to arena or scalar.\n", regno); 10729 return -EINVAL; 10730 } 10731 } else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) { 10732 ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_DYNPTR); 10733 if (ret) 10734 return ret; 10735 10736 ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0); 10737 if (ret) 10738 return ret; 10739 } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) { 10740 struct bpf_call_arg_meta meta; 10741 int err; 10742 10743 if (register_is_null(reg) && type_may_be_null(arg->arg_type)) 10744 continue; 10745 10746 memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */ 10747 err = check_reg_type(env, regno, arg->arg_type, &arg->btf_id, &meta); 10748 err = err ?: check_func_arg_reg_off(env, reg, regno, arg->arg_type); 10749 if (err) 10750 return err; 10751 } else { 10752 verifier_bug(env, "unrecognized arg#%d type %d", i, arg->arg_type); 10753 return -EFAULT; 10754 } 10755 } 10756 10757 return 0; 10758 } 10759 10760 /* Compare BTF of a function call with given bpf_reg_state. 10761 * Returns: 10762 * EFAULT - there is a verifier bug. Abort verification. 10763 * EINVAL - there is a type mismatch or BTF is not available. 10764 * 0 - BTF matches with what bpf_reg_state expects. 10765 * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. 10766 */ 10767 static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, 10768 struct bpf_reg_state *regs) 10769 { 10770 struct bpf_prog *prog = env->prog; 10771 struct btf *btf = prog->aux->btf; 10772 u32 btf_id; 10773 int err; 10774 10775 if (!prog->aux->func_info) 10776 return -EINVAL; 10777 10778 btf_id = prog->aux->func_info[subprog].type_id; 10779 if (!btf_id) 10780 return -EFAULT; 10781 10782 if (prog->aux->func_info_aux[subprog].unreliable) 10783 return -EINVAL; 10784 10785 err = btf_check_func_arg_match(env, subprog, btf, regs); 10786 /* Compiler optimizations can remove arguments from static functions 10787 * or mismatched type can be passed into a global function. 10788 * In such cases mark the function as unreliable from BTF point of view. 10789 */ 10790 if (err) 10791 prog->aux->func_info_aux[subprog].unreliable = true; 10792 return err; 10793 } 10794 10795 static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 10796 int insn_idx, int subprog, 10797 set_callee_state_fn set_callee_state_cb) 10798 { 10799 struct bpf_verifier_state *state = env->cur_state, *callback_state; 10800 struct bpf_func_state *caller, *callee; 10801 int err; 10802 10803 caller = state->frame[state->curframe]; 10804 err = btf_check_subprog_call(env, subprog, caller->regs); 10805 if (err == -EFAULT) 10806 return err; 10807 10808 /* set_callee_state is used for direct subprog calls, but we are 10809 * interested in validating only BPF helpers that can call subprogs as 10810 * callbacks 10811 */ 10812 env->subprog_info[subprog].is_cb = true; 10813 if (bpf_pseudo_kfunc_call(insn) && 10814 !is_callback_calling_kfunc(insn->imm)) { 10815 verifier_bug(env, "kfunc %s#%d not marked as callback-calling", 10816 func_id_name(insn->imm), insn->imm); 10817 return -EFAULT; 10818 } else if (!bpf_pseudo_kfunc_call(insn) && 10819 !is_callback_calling_function(insn->imm)) { /* helper */ 10820 verifier_bug(env, "helper %s#%d not marked as callback-calling", 10821 func_id_name(insn->imm), insn->imm); 10822 return -EFAULT; 10823 } 10824 10825 if (is_async_callback_calling_insn(insn)) { 10826 struct bpf_verifier_state *async_cb; 10827 10828 /* there is no real recursion here. timer and workqueue callbacks are async */ 10829 env->subprog_info[subprog].is_async_cb = true; 10830 async_cb = push_async_cb(env, env->subprog_info[subprog].start, 10831 insn_idx, subprog, 10832 is_async_cb_sleepable(env, insn)); 10833 if (IS_ERR(async_cb)) 10834 return PTR_ERR(async_cb); 10835 callee = async_cb->frame[0]; 10836 callee->async_entry_cnt = caller->async_entry_cnt + 1; 10837 10838 /* Convert bpf_timer_set_callback() args into timer callback args */ 10839 err = set_callee_state_cb(env, caller, callee, insn_idx); 10840 if (err) 10841 return err; 10842 10843 return 0; 10844 } 10845 10846 /* for callback functions enqueue entry to callback and 10847 * proceed with next instruction within current frame. 10848 */ 10849 callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false); 10850 if (IS_ERR(callback_state)) 10851 return PTR_ERR(callback_state); 10852 10853 err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb, 10854 callback_state); 10855 if (err) 10856 return err; 10857 10858 callback_state->callback_unroll_depth++; 10859 callback_state->frame[callback_state->curframe - 1]->callback_depth++; 10860 caller->callback_depth = 0; 10861 return 0; 10862 } 10863 10864 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 10865 int *insn_idx) 10866 { 10867 struct bpf_verifier_state *state = env->cur_state; 10868 struct bpf_func_state *caller; 10869 int err, subprog, target_insn; 10870 10871 target_insn = *insn_idx + insn->imm + 1; 10872 subprog = find_subprog(env, target_insn); 10873 if (verifier_bug_if(subprog < 0, env, "target of func call at insn %d is not a program", 10874 target_insn)) 10875 return -EFAULT; 10876 10877 caller = state->frame[state->curframe]; 10878 err = btf_check_subprog_call(env, subprog, caller->regs); 10879 if (err == -EFAULT) 10880 return err; 10881 if (subprog_is_global(env, subprog)) { 10882 const char *sub_name = subprog_name(env, subprog); 10883 10884 if (env->cur_state->active_locks) { 10885 verbose(env, "global function calls are not allowed while holding a lock,\n" 10886 "use static function instead\n"); 10887 return -EINVAL; 10888 } 10889 10890 if (env->subprog_info[subprog].might_sleep && 10891 (env->cur_state->active_rcu_locks || env->cur_state->active_preempt_locks || 10892 env->cur_state->active_irq_id || !in_sleepable(env))) { 10893 verbose(env, "global functions that may sleep are not allowed in non-sleepable context,\n" 10894 "i.e., in a RCU/IRQ/preempt-disabled section, or in\n" 10895 "a non-sleepable BPF program context\n"); 10896 return -EINVAL; 10897 } 10898 10899 if (err) { 10900 verbose(env, "Caller passes invalid args into func#%d ('%s')\n", 10901 subprog, sub_name); 10902 return err; 10903 } 10904 10905 if (env->log.level & BPF_LOG_LEVEL) 10906 verbose(env, "Func#%d ('%s') is global and assumed valid.\n", 10907 subprog, sub_name); 10908 if (env->subprog_info[subprog].changes_pkt_data) 10909 clear_all_pkt_pointers(env); 10910 /* mark global subprog for verifying after main prog */ 10911 subprog_aux(env, subprog)->called = true; 10912 clear_caller_saved_regs(env, caller->regs); 10913 10914 /* All global functions return a 64-bit SCALAR_VALUE */ 10915 mark_reg_unknown(env, caller->regs, BPF_REG_0); 10916 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; 10917 10918 /* continue with next insn after call */ 10919 return 0; 10920 } 10921 10922 /* for regular function entry setup new frame and continue 10923 * from that frame. 10924 */ 10925 err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state); 10926 if (err) 10927 return err; 10928 10929 clear_caller_saved_regs(env, caller->regs); 10930 10931 /* and go analyze first insn of the callee */ 10932 *insn_idx = env->subprog_info[subprog].start - 1; 10933 10934 bpf_reset_live_stack_callchain(env); 10935 10936 if (env->log.level & BPF_LOG_LEVEL) { 10937 verbose(env, "caller:\n"); 10938 print_verifier_state(env, state, caller->frameno, true); 10939 verbose(env, "callee:\n"); 10940 print_verifier_state(env, state, state->curframe, true); 10941 } 10942 10943 return 0; 10944 } 10945 10946 int map_set_for_each_callback_args(struct bpf_verifier_env *env, 10947 struct bpf_func_state *caller, 10948 struct bpf_func_state *callee) 10949 { 10950 /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, 10951 * void *callback_ctx, u64 flags); 10952 * callback_fn(struct bpf_map *map, void *key, void *value, 10953 * void *callback_ctx); 10954 */ 10955 callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1]; 10956 10957 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY; 10958 __mark_reg_known_zero(&callee->regs[BPF_REG_2]); 10959 callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr; 10960 10961 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE; 10962 __mark_reg_known_zero(&callee->regs[BPF_REG_3]); 10963 callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr; 10964 10965 /* pointer to stack or null */ 10966 callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3]; 10967 10968 /* unused */ 10969 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); 10970 return 0; 10971 } 10972 10973 static int set_callee_state(struct bpf_verifier_env *env, 10974 struct bpf_func_state *caller, 10975 struct bpf_func_state *callee, int insn_idx) 10976 { 10977 int i; 10978 10979 /* copy r1 - r5 args that callee can access. The copy includes parent 10980 * pointers, which connects us up to the liveness chain 10981 */ 10982 for (i = BPF_REG_1; i <= BPF_REG_5; i++) 10983 callee->regs[i] = caller->regs[i]; 10984 return 0; 10985 } 10986 10987 static int set_map_elem_callback_state(struct bpf_verifier_env *env, 10988 struct bpf_func_state *caller, 10989 struct bpf_func_state *callee, 10990 int insn_idx) 10991 { 10992 struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx]; 10993 struct bpf_map *map; 10994 int err; 10995 10996 /* valid map_ptr and poison value does not matter */ 10997 map = insn_aux->map_ptr_state.map_ptr; 10998 if (!map->ops->map_set_for_each_callback_args || 10999 !map->ops->map_for_each_callback) { 11000 verbose(env, "callback function not allowed for map\n"); 11001 return -ENOTSUPP; 11002 } 11003 11004 err = map->ops->map_set_for_each_callback_args(env, caller, callee); 11005 if (err) 11006 return err; 11007 11008 callee->in_callback_fn = true; 11009 callee->callback_ret_range = retval_range(0, 1); 11010 return 0; 11011 } 11012 11013 static int set_loop_callback_state(struct bpf_verifier_env *env, 11014 struct bpf_func_state *caller, 11015 struct bpf_func_state *callee, 11016 int insn_idx) 11017 { 11018 /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, 11019 * u64 flags); 11020 * callback_fn(u64 index, void *callback_ctx); 11021 */ 11022 callee->regs[BPF_REG_1].type = SCALAR_VALUE; 11023 callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; 11024 11025 /* unused */ 11026 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]); 11027 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); 11028 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); 11029 11030 callee->in_callback_fn = true; 11031 callee->callback_ret_range = retval_range(0, 1); 11032 return 0; 11033 } 11034 11035 static int set_timer_callback_state(struct bpf_verifier_env *env, 11036 struct bpf_func_state *caller, 11037 struct bpf_func_state *callee, 11038 int insn_idx) 11039 { 11040 struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr; 11041 11042 /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn); 11043 * callback_fn(struct bpf_map *map, void *key, void *value); 11044 */ 11045 callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP; 11046 __mark_reg_known_zero(&callee->regs[BPF_REG_1]); 11047 callee->regs[BPF_REG_1].map_ptr = map_ptr; 11048 11049 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY; 11050 __mark_reg_known_zero(&callee->regs[BPF_REG_2]); 11051 callee->regs[BPF_REG_2].map_ptr = map_ptr; 11052 11053 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE; 11054 __mark_reg_known_zero(&callee->regs[BPF_REG_3]); 11055 callee->regs[BPF_REG_3].map_ptr = map_ptr; 11056 11057 /* unused */ 11058 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); 11059 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); 11060 callee->in_async_callback_fn = true; 11061 callee->callback_ret_range = retval_range(0, 0); 11062 return 0; 11063 } 11064 11065 static int set_find_vma_callback_state(struct bpf_verifier_env *env, 11066 struct bpf_func_state *caller, 11067 struct bpf_func_state *callee, 11068 int insn_idx) 11069 { 11070 /* bpf_find_vma(struct task_struct *task, u64 addr, 11071 * void *callback_fn, void *callback_ctx, u64 flags) 11072 * (callback_fn)(struct task_struct *task, 11073 * struct vm_area_struct *vma, void *callback_ctx); 11074 */ 11075 callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1]; 11076 11077 callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID; 11078 __mark_reg_known_zero(&callee->regs[BPF_REG_2]); 11079 callee->regs[BPF_REG_2].btf = btf_vmlinux; 11080 callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA]; 11081 11082 /* pointer to stack or null */ 11083 callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4]; 11084 11085 /* unused */ 11086 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); 11087 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); 11088 callee->in_callback_fn = true; 11089 callee->callback_ret_range = retval_range(0, 1); 11090 return 0; 11091 } 11092 11093 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env, 11094 struct bpf_func_state *caller, 11095 struct bpf_func_state *callee, 11096 int insn_idx) 11097 { 11098 /* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void 11099 * callback_ctx, u64 flags); 11100 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx); 11101 */ 11102 __mark_reg_not_init(env, &callee->regs[BPF_REG_0]); 11103 mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL); 11104 callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; 11105 11106 /* unused */ 11107 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]); 11108 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); 11109 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); 11110 11111 callee->in_callback_fn = true; 11112 callee->callback_ret_range = retval_range(0, 1); 11113 return 0; 11114 } 11115 11116 static int set_rbtree_add_callback_state(struct bpf_verifier_env *env, 11117 struct bpf_func_state *caller, 11118 struct bpf_func_state *callee, 11119 int insn_idx) 11120 { 11121 /* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node, 11122 * bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b)); 11123 * 11124 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset 11125 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd 11126 * by this point, so look at 'root' 11127 */ 11128 struct btf_field *field; 11129 11130 field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off, 11131 BPF_RB_ROOT); 11132 if (!field || !field->graph_root.value_btf_id) 11133 return -EFAULT; 11134 11135 mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root); 11136 ref_set_non_owning(env, &callee->regs[BPF_REG_1]); 11137 mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root); 11138 ref_set_non_owning(env, &callee->regs[BPF_REG_2]); 11139 11140 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]); 11141 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); 11142 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); 11143 callee->in_callback_fn = true; 11144 callee->callback_ret_range = retval_range(0, 1); 11145 return 0; 11146 } 11147 11148 static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env, 11149 struct bpf_func_state *caller, 11150 struct bpf_func_state *callee, 11151 int insn_idx) 11152 { 11153 struct bpf_map *map_ptr = caller->regs[BPF_REG_3].map_ptr; 11154 11155 /* 11156 * callback_fn(struct bpf_map *map, void *key, void *value); 11157 */ 11158 callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP; 11159 __mark_reg_known_zero(&callee->regs[BPF_REG_1]); 11160 callee->regs[BPF_REG_1].map_ptr = map_ptr; 11161 11162 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY; 11163 __mark_reg_known_zero(&callee->regs[BPF_REG_2]); 11164 callee->regs[BPF_REG_2].map_ptr = map_ptr; 11165 11166 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE; 11167 __mark_reg_known_zero(&callee->regs[BPF_REG_3]); 11168 callee->regs[BPF_REG_3].map_ptr = map_ptr; 11169 11170 /* unused */ 11171 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); 11172 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); 11173 callee->in_async_callback_fn = true; 11174 callee->callback_ret_range = retval_range(S32_MIN, S32_MAX); 11175 return 0; 11176 } 11177 11178 static bool is_rbtree_lock_required_kfunc(u32 btf_id); 11179 11180 /* Are we currently verifying the callback for a rbtree helper that must 11181 * be called with lock held? If so, no need to complain about unreleased 11182 * lock 11183 */ 11184 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env) 11185 { 11186 struct bpf_verifier_state *state = env->cur_state; 11187 struct bpf_insn *insn = env->prog->insnsi; 11188 struct bpf_func_state *callee; 11189 int kfunc_btf_id; 11190 11191 if (!state->curframe) 11192 return false; 11193 11194 callee = state->frame[state->curframe]; 11195 11196 if (!callee->in_callback_fn) 11197 return false; 11198 11199 kfunc_btf_id = insn[callee->callsite].imm; 11200 return is_rbtree_lock_required_kfunc(kfunc_btf_id); 11201 } 11202 11203 static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg, 11204 bool return_32bit) 11205 { 11206 if (return_32bit) 11207 return range.minval <= reg->s32_min_value && reg->s32_max_value <= range.maxval; 11208 else 11209 return range.minval <= reg->smin_value && reg->smax_value <= range.maxval; 11210 } 11211 11212 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) 11213 { 11214 struct bpf_verifier_state *state = env->cur_state, *prev_st; 11215 struct bpf_func_state *caller, *callee; 11216 struct bpf_reg_state *r0; 11217 bool in_callback_fn; 11218 int err; 11219 11220 err = bpf_update_live_stack(env); 11221 if (err) 11222 return err; 11223 11224 callee = state->frame[state->curframe]; 11225 r0 = &callee->regs[BPF_REG_0]; 11226 if (r0->type == PTR_TO_STACK) { 11227 /* technically it's ok to return caller's stack pointer 11228 * (or caller's caller's pointer) back to the caller, 11229 * since these pointers are valid. Only current stack 11230 * pointer will be invalid as soon as function exits, 11231 * but let's be conservative 11232 */ 11233 verbose(env, "cannot return stack pointer to the caller\n"); 11234 return -EINVAL; 11235 } 11236 11237 caller = state->frame[state->curframe - 1]; 11238 if (callee->in_callback_fn) { 11239 if (r0->type != SCALAR_VALUE) { 11240 verbose(env, "R0 not a scalar value\n"); 11241 return -EACCES; 11242 } 11243 11244 /* we are going to rely on register's precise value */ 11245 err = mark_chain_precision(env, BPF_REG_0); 11246 if (err) 11247 return err; 11248 11249 /* enforce R0 return value range, and bpf_callback_t returns 64bit */ 11250 if (!retval_range_within(callee->callback_ret_range, r0, false)) { 11251 verbose_invalid_scalar(env, r0, callee->callback_ret_range, 11252 "At callback return", "R0"); 11253 return -EINVAL; 11254 } 11255 if (!bpf_calls_callback(env, callee->callsite)) { 11256 verifier_bug(env, "in callback at %d, callsite %d !calls_callback", 11257 *insn_idx, callee->callsite); 11258 return -EFAULT; 11259 } 11260 } else { 11261 /* return to the caller whatever r0 had in the callee */ 11262 caller->regs[BPF_REG_0] = *r0; 11263 } 11264 11265 /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite, 11266 * there function call logic would reschedule callback visit. If iteration 11267 * converges is_state_visited() would prune that visit eventually. 11268 */ 11269 in_callback_fn = callee->in_callback_fn; 11270 if (in_callback_fn) 11271 *insn_idx = callee->callsite; 11272 else 11273 *insn_idx = callee->callsite + 1; 11274 11275 if (env->log.level & BPF_LOG_LEVEL) { 11276 verbose(env, "returning from callee:\n"); 11277 print_verifier_state(env, state, callee->frameno, true); 11278 verbose(env, "to caller at %d:\n", *insn_idx); 11279 print_verifier_state(env, state, caller->frameno, true); 11280 } 11281 /* clear everything in the callee. In case of exceptional exits using 11282 * bpf_throw, this will be done by copy_verifier_state for extra frames. */ 11283 free_func_state(callee); 11284 state->frame[state->curframe--] = NULL; 11285 11286 /* for callbacks widen imprecise scalars to make programs like below verify: 11287 * 11288 * struct ctx { int i; } 11289 * void cb(int idx, struct ctx *ctx) { ctx->i++; ... } 11290 * ... 11291 * struct ctx = { .i = 0; } 11292 * bpf_loop(100, cb, &ctx, 0); 11293 * 11294 * This is similar to what is done in process_iter_next_call() for open 11295 * coded iterators. 11296 */ 11297 prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL; 11298 if (prev_st) { 11299 err = widen_imprecise_scalars(env, prev_st, state); 11300 if (err) 11301 return err; 11302 } 11303 return 0; 11304 } 11305 11306 static int do_refine_retval_range(struct bpf_verifier_env *env, 11307 struct bpf_reg_state *regs, int ret_type, 11308 int func_id, 11309 struct bpf_call_arg_meta *meta) 11310 { 11311 struct bpf_reg_state *ret_reg = ®s[BPF_REG_0]; 11312 11313 if (ret_type != RET_INTEGER) 11314 return 0; 11315 11316 switch (func_id) { 11317 case BPF_FUNC_get_stack: 11318 case BPF_FUNC_get_task_stack: 11319 case BPF_FUNC_probe_read_str: 11320 case BPF_FUNC_probe_read_kernel_str: 11321 case BPF_FUNC_probe_read_user_str: 11322 ret_reg->smax_value = meta->msize_max_value; 11323 ret_reg->s32_max_value = meta->msize_max_value; 11324 ret_reg->smin_value = -MAX_ERRNO; 11325 ret_reg->s32_min_value = -MAX_ERRNO; 11326 reg_bounds_sync(ret_reg); 11327 break; 11328 case BPF_FUNC_get_smp_processor_id: 11329 ret_reg->umax_value = nr_cpu_ids - 1; 11330 ret_reg->u32_max_value = nr_cpu_ids - 1; 11331 ret_reg->smax_value = nr_cpu_ids - 1; 11332 ret_reg->s32_max_value = nr_cpu_ids - 1; 11333 ret_reg->umin_value = 0; 11334 ret_reg->u32_min_value = 0; 11335 ret_reg->smin_value = 0; 11336 ret_reg->s32_min_value = 0; 11337 reg_bounds_sync(ret_reg); 11338 break; 11339 } 11340 11341 return reg_bounds_sanity_check(env, ret_reg, "retval"); 11342 } 11343 11344 static int 11345 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, 11346 int func_id, int insn_idx) 11347 { 11348 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; 11349 struct bpf_map *map = meta->map.ptr; 11350 11351 if (func_id != BPF_FUNC_tail_call && 11352 func_id != BPF_FUNC_map_lookup_elem && 11353 func_id != BPF_FUNC_map_update_elem && 11354 func_id != BPF_FUNC_map_delete_elem && 11355 func_id != BPF_FUNC_map_push_elem && 11356 func_id != BPF_FUNC_map_pop_elem && 11357 func_id != BPF_FUNC_map_peek_elem && 11358 func_id != BPF_FUNC_for_each_map_elem && 11359 func_id != BPF_FUNC_redirect_map && 11360 func_id != BPF_FUNC_map_lookup_percpu_elem) 11361 return 0; 11362 11363 if (map == NULL) { 11364 verifier_bug(env, "expected map for helper call"); 11365 return -EFAULT; 11366 } 11367 11368 /* In case of read-only, some additional restrictions 11369 * need to be applied in order to prevent altering the 11370 * state of the map from program side. 11371 */ 11372 if ((map->map_flags & BPF_F_RDONLY_PROG) && 11373 (func_id == BPF_FUNC_map_delete_elem || 11374 func_id == BPF_FUNC_map_update_elem || 11375 func_id == BPF_FUNC_map_push_elem || 11376 func_id == BPF_FUNC_map_pop_elem)) { 11377 verbose(env, "write into map forbidden\n"); 11378 return -EACCES; 11379 } 11380 11381 if (!aux->map_ptr_state.map_ptr) 11382 bpf_map_ptr_store(aux, meta->map.ptr, 11383 !meta->map.ptr->bypass_spec_v1, false); 11384 else if (aux->map_ptr_state.map_ptr != meta->map.ptr) 11385 bpf_map_ptr_store(aux, meta->map.ptr, 11386 !meta->map.ptr->bypass_spec_v1, true); 11387 return 0; 11388 } 11389 11390 static int 11391 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, 11392 int func_id, int insn_idx) 11393 { 11394 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; 11395 struct bpf_reg_state *reg; 11396 struct bpf_map *map = meta->map.ptr; 11397 u64 val, max; 11398 int err; 11399 11400 if (func_id != BPF_FUNC_tail_call) 11401 return 0; 11402 if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) { 11403 verbose(env, "expected prog array map for tail call"); 11404 return -EINVAL; 11405 } 11406 11407 reg = reg_state(env, BPF_REG_3); 11408 val = reg->var_off.value; 11409 max = map->max_entries; 11410 11411 if (!(is_reg_const(reg, false) && val < max)) { 11412 bpf_map_key_store(aux, BPF_MAP_KEY_POISON); 11413 return 0; 11414 } 11415 11416 err = mark_chain_precision(env, BPF_REG_3); 11417 if (err) 11418 return err; 11419 if (bpf_map_key_unseen(aux)) 11420 bpf_map_key_store(aux, val); 11421 else if (!bpf_map_key_poisoned(aux) && 11422 bpf_map_key_immediate(aux) != val) 11423 bpf_map_key_store(aux, BPF_MAP_KEY_POISON); 11424 return 0; 11425 } 11426 11427 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit) 11428 { 11429 struct bpf_verifier_state *state = env->cur_state; 11430 enum bpf_prog_type type = resolve_prog_type(env->prog); 11431 struct bpf_reg_state *reg = reg_state(env, BPF_REG_0); 11432 bool refs_lingering = false; 11433 int i; 11434 11435 if (!exception_exit && cur_func(env)->frameno) 11436 return 0; 11437 11438 for (i = 0; i < state->acquired_refs; i++) { 11439 if (state->refs[i].type != REF_TYPE_PTR) 11440 continue; 11441 /* Allow struct_ops programs to return a referenced kptr back to 11442 * kernel. Type checks are performed later in check_return_code. 11443 */ 11444 if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit && 11445 reg->ref_obj_id == state->refs[i].id) 11446 continue; 11447 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", 11448 state->refs[i].id, state->refs[i].insn_idx); 11449 refs_lingering = true; 11450 } 11451 return refs_lingering ? -EINVAL : 0; 11452 } 11453 11454 static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit, bool check_lock, const char *prefix) 11455 { 11456 int err; 11457 11458 if (check_lock && env->cur_state->active_locks) { 11459 verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix); 11460 return -EINVAL; 11461 } 11462 11463 err = check_reference_leak(env, exception_exit); 11464 if (err) { 11465 verbose(env, "%s would lead to reference leak\n", prefix); 11466 return err; 11467 } 11468 11469 if (check_lock && env->cur_state->active_irq_id) { 11470 verbose(env, "%s cannot be used inside bpf_local_irq_save-ed region\n", prefix); 11471 return -EINVAL; 11472 } 11473 11474 if (check_lock && env->cur_state->active_rcu_locks) { 11475 verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix); 11476 return -EINVAL; 11477 } 11478 11479 if (check_lock && env->cur_state->active_preempt_locks) { 11480 verbose(env, "%s cannot be used inside bpf_preempt_disable-ed region\n", prefix); 11481 return -EINVAL; 11482 } 11483 11484 return 0; 11485 } 11486 11487 static int check_bpf_snprintf_call(struct bpf_verifier_env *env, 11488 struct bpf_reg_state *regs) 11489 { 11490 struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3]; 11491 struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5]; 11492 struct bpf_map *fmt_map = fmt_reg->map_ptr; 11493 struct bpf_bprintf_data data = {}; 11494 int err, fmt_map_off, num_args; 11495 u64 fmt_addr; 11496 char *fmt; 11497 11498 /* data must be an array of u64 */ 11499 if (data_len_reg->var_off.value % 8) 11500 return -EINVAL; 11501 num_args = data_len_reg->var_off.value / 8; 11502 11503 /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const 11504 * and map_direct_value_addr is set. 11505 */ 11506 fmt_map_off = fmt_reg->off + fmt_reg->var_off.value; 11507 err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr, 11508 fmt_map_off); 11509 if (err) { 11510 verbose(env, "failed to retrieve map value address\n"); 11511 return -EFAULT; 11512 } 11513 fmt = (char *)(long)fmt_addr + fmt_map_off; 11514 11515 /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we 11516 * can focus on validating the format specifiers. 11517 */ 11518 err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data); 11519 if (err < 0) 11520 verbose(env, "Invalid format string\n"); 11521 11522 return err; 11523 } 11524 11525 static int check_get_func_ip(struct bpf_verifier_env *env) 11526 { 11527 enum bpf_prog_type type = resolve_prog_type(env->prog); 11528 int func_id = BPF_FUNC_get_func_ip; 11529 11530 if (type == BPF_PROG_TYPE_TRACING) { 11531 if (!bpf_prog_has_trampoline(env->prog)) { 11532 verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n", 11533 func_id_name(func_id), func_id); 11534 return -ENOTSUPP; 11535 } 11536 return 0; 11537 } else if (type == BPF_PROG_TYPE_KPROBE) { 11538 return 0; 11539 } 11540 11541 verbose(env, "func %s#%d not supported for program type %d\n", 11542 func_id_name(func_id), func_id, type); 11543 return -ENOTSUPP; 11544 } 11545 11546 static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env) 11547 { 11548 return &env->insn_aux_data[env->insn_idx]; 11549 } 11550 11551 static bool loop_flag_is_zero(struct bpf_verifier_env *env) 11552 { 11553 struct bpf_reg_state *reg = reg_state(env, BPF_REG_4); 11554 bool reg_is_null = register_is_null(reg); 11555 11556 if (reg_is_null) 11557 mark_chain_precision(env, BPF_REG_4); 11558 11559 return reg_is_null; 11560 } 11561 11562 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno) 11563 { 11564 struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state; 11565 11566 if (!state->initialized) { 11567 state->initialized = 1; 11568 state->fit_for_inline = loop_flag_is_zero(env); 11569 state->callback_subprogno = subprogno; 11570 return; 11571 } 11572 11573 if (!state->fit_for_inline) 11574 return; 11575 11576 state->fit_for_inline = (loop_flag_is_zero(env) && 11577 state->callback_subprogno == subprogno); 11578 } 11579 11580 /* Returns whether or not the given map type can potentially elide 11581 * lookup return value nullness check. This is possible if the key 11582 * is statically known. 11583 */ 11584 static bool can_elide_value_nullness(enum bpf_map_type type) 11585 { 11586 switch (type) { 11587 case BPF_MAP_TYPE_ARRAY: 11588 case BPF_MAP_TYPE_PERCPU_ARRAY: 11589 return true; 11590 default: 11591 return false; 11592 } 11593 } 11594 11595 static int get_helper_proto(struct bpf_verifier_env *env, int func_id, 11596 const struct bpf_func_proto **ptr) 11597 { 11598 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) 11599 return -ERANGE; 11600 11601 if (!env->ops->get_func_proto) 11602 return -EINVAL; 11603 11604 *ptr = env->ops->get_func_proto(func_id, env->prog); 11605 return *ptr && (*ptr)->func ? 0 : -EINVAL; 11606 } 11607 11608 /* Check if we're in a sleepable context. */ 11609 static inline bool in_sleepable_context(struct bpf_verifier_env *env) 11610 { 11611 return !env->cur_state->active_rcu_locks && 11612 !env->cur_state->active_preempt_locks && 11613 !env->cur_state->active_locks && 11614 !env->cur_state->active_irq_id && 11615 in_sleepable(env); 11616 } 11617 11618 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 11619 int *insn_idx_p) 11620 { 11621 enum bpf_prog_type prog_type = resolve_prog_type(env->prog); 11622 bool returns_cpu_specific_alloc_ptr = false; 11623 const struct bpf_func_proto *fn = NULL; 11624 enum bpf_return_type ret_type; 11625 enum bpf_type_flag ret_flag; 11626 struct bpf_reg_state *regs; 11627 struct bpf_call_arg_meta meta; 11628 int insn_idx = *insn_idx_p; 11629 bool changes_data; 11630 int i, err, func_id; 11631 11632 /* find function prototype */ 11633 func_id = insn->imm; 11634 err = get_helper_proto(env, insn->imm, &fn); 11635 if (err == -ERANGE) { 11636 verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id); 11637 return -EINVAL; 11638 } 11639 11640 if (err) { 11641 verbose(env, "program of this type cannot use helper %s#%d\n", 11642 func_id_name(func_id), func_id); 11643 return err; 11644 } 11645 11646 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 11647 if (!env->prog->gpl_compatible && fn->gpl_only) { 11648 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n"); 11649 return -EINVAL; 11650 } 11651 11652 if (fn->allowed && !fn->allowed(env->prog)) { 11653 verbose(env, "helper call is not allowed in probe\n"); 11654 return -EINVAL; 11655 } 11656 11657 if (!in_sleepable(env) && fn->might_sleep) { 11658 verbose(env, "helper call might sleep in a non-sleepable prog\n"); 11659 return -EINVAL; 11660 } 11661 11662 /* With LD_ABS/IND some JITs save/restore skb from r1. */ 11663 changes_data = bpf_helper_changes_pkt_data(func_id); 11664 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { 11665 verifier_bug(env, "func %s#%d: r1 != ctx", func_id_name(func_id), func_id); 11666 return -EFAULT; 11667 } 11668 11669 memset(&meta, 0, sizeof(meta)); 11670 meta.pkt_access = fn->pkt_access; 11671 11672 err = check_func_proto(fn); 11673 if (err) { 11674 verifier_bug(env, "incorrect func proto %s#%d", func_id_name(func_id), func_id); 11675 return err; 11676 } 11677 11678 if (env->cur_state->active_rcu_locks) { 11679 if (fn->might_sleep) { 11680 verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n", 11681 func_id_name(func_id), func_id); 11682 return -EINVAL; 11683 } 11684 } 11685 11686 if (env->cur_state->active_preempt_locks) { 11687 if (fn->might_sleep) { 11688 verbose(env, "sleepable helper %s#%d in non-preemptible region\n", 11689 func_id_name(func_id), func_id); 11690 return -EINVAL; 11691 } 11692 } 11693 11694 if (env->cur_state->active_irq_id) { 11695 if (fn->might_sleep) { 11696 verbose(env, "sleepable helper %s#%d in IRQ-disabled region\n", 11697 func_id_name(func_id), func_id); 11698 return -EINVAL; 11699 } 11700 } 11701 11702 /* Track non-sleepable context for helpers. */ 11703 if (!in_sleepable_context(env)) 11704 env->insn_aux_data[insn_idx].non_sleepable = true; 11705 11706 meta.func_id = func_id; 11707 /* check args */ 11708 for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { 11709 err = check_func_arg(env, i, &meta, fn, insn_idx); 11710 if (err) 11711 return err; 11712 } 11713 11714 err = record_func_map(env, &meta, func_id, insn_idx); 11715 if (err) 11716 return err; 11717 11718 err = record_func_key(env, &meta, func_id, insn_idx); 11719 if (err) 11720 return err; 11721 11722 /* Mark slots with STACK_MISC in case of raw mode, stack offset 11723 * is inferred from register state. 11724 */ 11725 for (i = 0; i < meta.access_size; i++) { 11726 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, 11727 BPF_WRITE, -1, false, false); 11728 if (err) 11729 return err; 11730 } 11731 11732 regs = cur_regs(env); 11733 11734 if (meta.release_regno) { 11735 err = -EINVAL; 11736 if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) { 11737 err = unmark_stack_slots_dynptr(env, ®s[meta.release_regno]); 11738 } else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) { 11739 u32 ref_obj_id = meta.ref_obj_id; 11740 bool in_rcu = in_rcu_cs(env); 11741 struct bpf_func_state *state; 11742 struct bpf_reg_state *reg; 11743 11744 err = release_reference_nomark(env->cur_state, ref_obj_id); 11745 if (!err) { 11746 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ 11747 if (reg->ref_obj_id == ref_obj_id) { 11748 if (in_rcu && (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) { 11749 reg->ref_obj_id = 0; 11750 reg->type &= ~MEM_ALLOC; 11751 reg->type |= MEM_RCU; 11752 } else { 11753 mark_reg_invalid(env, reg); 11754 } 11755 } 11756 })); 11757 } 11758 } else if (meta.ref_obj_id) { 11759 err = release_reference(env, meta.ref_obj_id); 11760 } else if (register_is_null(®s[meta.release_regno])) { 11761 /* meta.ref_obj_id can only be 0 if register that is meant to be 11762 * released is NULL, which must be > R0. 11763 */ 11764 err = 0; 11765 } 11766 if (err) { 11767 verbose(env, "func %s#%d reference has not been acquired before\n", 11768 func_id_name(func_id), func_id); 11769 return err; 11770 } 11771 } 11772 11773 switch (func_id) { 11774 case BPF_FUNC_tail_call: 11775 err = check_resource_leak(env, false, true, "tail_call"); 11776 if (err) 11777 return err; 11778 break; 11779 case BPF_FUNC_get_local_storage: 11780 /* check that flags argument in get_local_storage(map, flags) is 0, 11781 * this is required because get_local_storage() can't return an error. 11782 */ 11783 if (!register_is_null(®s[BPF_REG_2])) { 11784 verbose(env, "get_local_storage() doesn't support non-zero flags\n"); 11785 return -EINVAL; 11786 } 11787 break; 11788 case BPF_FUNC_for_each_map_elem: 11789 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 11790 set_map_elem_callback_state); 11791 break; 11792 case BPF_FUNC_timer_set_callback: 11793 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 11794 set_timer_callback_state); 11795 break; 11796 case BPF_FUNC_find_vma: 11797 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 11798 set_find_vma_callback_state); 11799 break; 11800 case BPF_FUNC_snprintf: 11801 err = check_bpf_snprintf_call(env, regs); 11802 break; 11803 case BPF_FUNC_loop: 11804 update_loop_inline_state(env, meta.subprogno); 11805 /* Verifier relies on R1 value to determine if bpf_loop() iteration 11806 * is finished, thus mark it precise. 11807 */ 11808 err = mark_chain_precision(env, BPF_REG_1); 11809 if (err) 11810 return err; 11811 if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) { 11812 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 11813 set_loop_callback_state); 11814 } else { 11815 cur_func(env)->callback_depth = 0; 11816 if (env->log.level & BPF_LOG_LEVEL2) 11817 verbose(env, "frame%d bpf_loop iteration limit reached\n", 11818 env->cur_state->curframe); 11819 } 11820 break; 11821 case BPF_FUNC_dynptr_from_mem: 11822 if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { 11823 verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n", 11824 reg_type_str(env, regs[BPF_REG_1].type)); 11825 return -EACCES; 11826 } 11827 break; 11828 case BPF_FUNC_set_retval: 11829 if (prog_type == BPF_PROG_TYPE_LSM && 11830 env->prog->expected_attach_type == BPF_LSM_CGROUP) { 11831 if (!env->prog->aux->attach_func_proto->type) { 11832 /* Make sure programs that attach to void 11833 * hooks don't try to modify return value. 11834 */ 11835 verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n"); 11836 return -EINVAL; 11837 } 11838 } 11839 break; 11840 case BPF_FUNC_dynptr_data: 11841 { 11842 struct bpf_reg_state *reg; 11843 int id, ref_obj_id; 11844 11845 reg = get_dynptr_arg_reg(env, fn, regs); 11846 if (!reg) 11847 return -EFAULT; 11848 11849 11850 if (meta.dynptr_id) { 11851 verifier_bug(env, "meta.dynptr_id already set"); 11852 return -EFAULT; 11853 } 11854 if (meta.ref_obj_id) { 11855 verifier_bug(env, "meta.ref_obj_id already set"); 11856 return -EFAULT; 11857 } 11858 11859 id = dynptr_id(env, reg); 11860 if (id < 0) { 11861 verifier_bug(env, "failed to obtain dynptr id"); 11862 return id; 11863 } 11864 11865 ref_obj_id = dynptr_ref_obj_id(env, reg); 11866 if (ref_obj_id < 0) { 11867 verifier_bug(env, "failed to obtain dynptr ref_obj_id"); 11868 return ref_obj_id; 11869 } 11870 11871 meta.dynptr_id = id; 11872 meta.ref_obj_id = ref_obj_id; 11873 11874 break; 11875 } 11876 case BPF_FUNC_dynptr_write: 11877 { 11878 enum bpf_dynptr_type dynptr_type; 11879 struct bpf_reg_state *reg; 11880 11881 reg = get_dynptr_arg_reg(env, fn, regs); 11882 if (!reg) 11883 return -EFAULT; 11884 11885 dynptr_type = dynptr_get_type(env, reg); 11886 if (dynptr_type == BPF_DYNPTR_TYPE_INVALID) 11887 return -EFAULT; 11888 11889 if (dynptr_type == BPF_DYNPTR_TYPE_SKB || 11890 dynptr_type == BPF_DYNPTR_TYPE_SKB_META) 11891 /* this will trigger clear_all_pkt_pointers(), which will 11892 * invalidate all dynptr slices associated with the skb 11893 */ 11894 changes_data = true; 11895 11896 break; 11897 } 11898 case BPF_FUNC_per_cpu_ptr: 11899 case BPF_FUNC_this_cpu_ptr: 11900 { 11901 struct bpf_reg_state *reg = ®s[BPF_REG_1]; 11902 const struct btf_type *type; 11903 11904 if (reg->type & MEM_RCU) { 11905 type = btf_type_by_id(reg->btf, reg->btf_id); 11906 if (!type || !btf_type_is_struct(type)) { 11907 verbose(env, "Helper has invalid btf/btf_id in R1\n"); 11908 return -EFAULT; 11909 } 11910 returns_cpu_specific_alloc_ptr = true; 11911 env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true; 11912 } 11913 break; 11914 } 11915 case BPF_FUNC_user_ringbuf_drain: 11916 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 11917 set_user_ringbuf_callback_state); 11918 break; 11919 } 11920 11921 if (err) 11922 return err; 11923 11924 /* reset caller saved regs */ 11925 for (i = 0; i < CALLER_SAVED_REGS; i++) { 11926 mark_reg_not_init(env, regs, caller_saved[i]); 11927 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 11928 } 11929 11930 /* helper call returns 64-bit value. */ 11931 regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; 11932 11933 /* update return register (already marked as written above) */ 11934 ret_type = fn->ret_type; 11935 ret_flag = type_flag(ret_type); 11936 11937 switch (base_type(ret_type)) { 11938 case RET_INTEGER: 11939 /* sets type to SCALAR_VALUE */ 11940 mark_reg_unknown(env, regs, BPF_REG_0); 11941 break; 11942 case RET_VOID: 11943 regs[BPF_REG_0].type = NOT_INIT; 11944 break; 11945 case RET_PTR_TO_MAP_VALUE: 11946 /* There is no offset yet applied, variable or fixed */ 11947 mark_reg_known_zero(env, regs, BPF_REG_0); 11948 /* remember map_ptr, so that check_map_access() 11949 * can check 'value_size' boundary of memory access 11950 * to map element returned from bpf_map_lookup_elem() 11951 */ 11952 if (meta.map.ptr == NULL) { 11953 verifier_bug(env, "unexpected null map_ptr"); 11954 return -EFAULT; 11955 } 11956 11957 if (func_id == BPF_FUNC_map_lookup_elem && 11958 can_elide_value_nullness(meta.map.ptr->map_type) && 11959 meta.const_map_key >= 0 && 11960 meta.const_map_key < meta.map.ptr->max_entries) 11961 ret_flag &= ~PTR_MAYBE_NULL; 11962 11963 regs[BPF_REG_0].map_ptr = meta.map.ptr; 11964 regs[BPF_REG_0].map_uid = meta.map.uid; 11965 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag; 11966 if (!type_may_be_null(ret_flag) && 11967 btf_record_has_field(meta.map.ptr->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) { 11968 regs[BPF_REG_0].id = ++env->id_gen; 11969 } 11970 break; 11971 case RET_PTR_TO_SOCKET: 11972 mark_reg_known_zero(env, regs, BPF_REG_0); 11973 regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag; 11974 break; 11975 case RET_PTR_TO_SOCK_COMMON: 11976 mark_reg_known_zero(env, regs, BPF_REG_0); 11977 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag; 11978 break; 11979 case RET_PTR_TO_TCP_SOCK: 11980 mark_reg_known_zero(env, regs, BPF_REG_0); 11981 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag; 11982 break; 11983 case RET_PTR_TO_MEM: 11984 mark_reg_known_zero(env, regs, BPF_REG_0); 11985 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; 11986 regs[BPF_REG_0].mem_size = meta.mem_size; 11987 break; 11988 case RET_PTR_TO_MEM_OR_BTF_ID: 11989 { 11990 const struct btf_type *t; 11991 11992 mark_reg_known_zero(env, regs, BPF_REG_0); 11993 t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL); 11994 if (!btf_type_is_struct(t)) { 11995 u32 tsize; 11996 const struct btf_type *ret; 11997 const char *tname; 11998 11999 /* resolve the type size of ksym. */ 12000 ret = btf_resolve_size(meta.ret_btf, t, &tsize); 12001 if (IS_ERR(ret)) { 12002 tname = btf_name_by_offset(meta.ret_btf, t->name_off); 12003 verbose(env, "unable to resolve the size of type '%s': %ld\n", 12004 tname, PTR_ERR(ret)); 12005 return -EINVAL; 12006 } 12007 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; 12008 regs[BPF_REG_0].mem_size = tsize; 12009 } else { 12010 if (returns_cpu_specific_alloc_ptr) { 12011 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU; 12012 } else { 12013 /* MEM_RDONLY may be carried from ret_flag, but it 12014 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise 12015 * it will confuse the check of PTR_TO_BTF_ID in 12016 * check_mem_access(). 12017 */ 12018 ret_flag &= ~MEM_RDONLY; 12019 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; 12020 } 12021 12022 regs[BPF_REG_0].btf = meta.ret_btf; 12023 regs[BPF_REG_0].btf_id = meta.ret_btf_id; 12024 } 12025 break; 12026 } 12027 case RET_PTR_TO_BTF_ID: 12028 { 12029 struct btf *ret_btf; 12030 int ret_btf_id; 12031 12032 mark_reg_known_zero(env, regs, BPF_REG_0); 12033 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; 12034 if (func_id == BPF_FUNC_kptr_xchg) { 12035 ret_btf = meta.kptr_field->kptr.btf; 12036 ret_btf_id = meta.kptr_field->kptr.btf_id; 12037 if (!btf_is_kernel(ret_btf)) { 12038 regs[BPF_REG_0].type |= MEM_ALLOC; 12039 if (meta.kptr_field->type == BPF_KPTR_PERCPU) 12040 regs[BPF_REG_0].type |= MEM_PERCPU; 12041 } 12042 } else { 12043 if (fn->ret_btf_id == BPF_PTR_POISON) { 12044 verifier_bug(env, "func %s has non-overwritten BPF_PTR_POISON return type", 12045 func_id_name(func_id)); 12046 return -EFAULT; 12047 } 12048 ret_btf = btf_vmlinux; 12049 ret_btf_id = *fn->ret_btf_id; 12050 } 12051 if (ret_btf_id == 0) { 12052 verbose(env, "invalid return type %u of func %s#%d\n", 12053 base_type(ret_type), func_id_name(func_id), 12054 func_id); 12055 return -EINVAL; 12056 } 12057 regs[BPF_REG_0].btf = ret_btf; 12058 regs[BPF_REG_0].btf_id = ret_btf_id; 12059 break; 12060 } 12061 default: 12062 verbose(env, "unknown return type %u of func %s#%d\n", 12063 base_type(ret_type), func_id_name(func_id), func_id); 12064 return -EINVAL; 12065 } 12066 12067 if (type_may_be_null(regs[BPF_REG_0].type)) 12068 regs[BPF_REG_0].id = ++env->id_gen; 12069 12070 if (helper_multiple_ref_obj_use(func_id, meta.map.ptr)) { 12071 verifier_bug(env, "func %s#%d sets ref_obj_id more than once", 12072 func_id_name(func_id), func_id); 12073 return -EFAULT; 12074 } 12075 12076 if (is_dynptr_ref_function(func_id)) 12077 regs[BPF_REG_0].dynptr_id = meta.dynptr_id; 12078 12079 if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) { 12080 /* For release_reference() */ 12081 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; 12082 } else if (is_acquire_function(func_id, meta.map.ptr)) { 12083 int id = acquire_reference(env, insn_idx); 12084 12085 if (id < 0) 12086 return id; 12087 /* For mark_ptr_or_null_reg() */ 12088 regs[BPF_REG_0].id = id; 12089 /* For release_reference() */ 12090 regs[BPF_REG_0].ref_obj_id = id; 12091 } 12092 12093 err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta); 12094 if (err) 12095 return err; 12096 12097 err = check_map_func_compatibility(env, meta.map.ptr, func_id); 12098 if (err) 12099 return err; 12100 12101 if ((func_id == BPF_FUNC_get_stack || 12102 func_id == BPF_FUNC_get_task_stack) && 12103 !env->prog->has_callchain_buf) { 12104 const char *err_str; 12105 12106 #ifdef CONFIG_PERF_EVENTS 12107 err = get_callchain_buffers(sysctl_perf_event_max_stack); 12108 err_str = "cannot get callchain buffer for func %s#%d\n"; 12109 #else 12110 err = -ENOTSUPP; 12111 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n"; 12112 #endif 12113 if (err) { 12114 verbose(env, err_str, func_id_name(func_id), func_id); 12115 return err; 12116 } 12117 12118 env->prog->has_callchain_buf = true; 12119 } 12120 12121 if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack) 12122 env->prog->call_get_stack = true; 12123 12124 if (func_id == BPF_FUNC_get_func_ip) { 12125 if (check_get_func_ip(env)) 12126 return -ENOTSUPP; 12127 env->prog->call_get_func_ip = true; 12128 } 12129 12130 if (func_id == BPF_FUNC_tail_call) { 12131 if (env->cur_state->curframe) { 12132 struct bpf_verifier_state *branch; 12133 12134 mark_reg_scratched(env, BPF_REG_0); 12135 branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false); 12136 if (IS_ERR(branch)) 12137 return PTR_ERR(branch); 12138 clear_all_pkt_pointers(env); 12139 mark_reg_unknown(env, regs, BPF_REG_0); 12140 err = prepare_func_exit(env, &env->insn_idx); 12141 if (err) 12142 return err; 12143 env->insn_idx--; 12144 } else { 12145 changes_data = false; 12146 } 12147 } 12148 12149 if (changes_data) 12150 clear_all_pkt_pointers(env); 12151 return 0; 12152 } 12153 12154 /* mark_btf_func_reg_size() is used when the reg size is determined by 12155 * the BTF func_proto's return value size and argument. 12156 */ 12157 static void __mark_btf_func_reg_size(struct bpf_verifier_env *env, struct bpf_reg_state *regs, 12158 u32 regno, size_t reg_size) 12159 { 12160 struct bpf_reg_state *reg = ®s[regno]; 12161 12162 if (regno == BPF_REG_0) { 12163 /* Function return value */ 12164 reg->subreg_def = reg_size == sizeof(u64) ? 12165 DEF_NOT_SUBREG : env->insn_idx + 1; 12166 } else if (reg_size == sizeof(u64)) { 12167 /* Function argument */ 12168 mark_insn_zext(env, reg); 12169 } 12170 } 12171 12172 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, 12173 size_t reg_size) 12174 { 12175 return __mark_btf_func_reg_size(env, cur_regs(env), regno, reg_size); 12176 } 12177 12178 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta) 12179 { 12180 return meta->kfunc_flags & KF_ACQUIRE; 12181 } 12182 12183 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta) 12184 { 12185 return meta->kfunc_flags & KF_RELEASE; 12186 } 12187 12188 static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta) 12189 { 12190 return meta->kfunc_flags & KF_SLEEPABLE; 12191 } 12192 12193 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta) 12194 { 12195 return meta->kfunc_flags & KF_DESTRUCTIVE; 12196 } 12197 12198 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta) 12199 { 12200 return meta->kfunc_flags & KF_RCU; 12201 } 12202 12203 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta) 12204 { 12205 return meta->kfunc_flags & KF_RCU_PROTECTED; 12206 } 12207 12208 static bool is_kfunc_arg_mem_size(const struct btf *btf, 12209 const struct btf_param *arg, 12210 const struct bpf_reg_state *reg) 12211 { 12212 const struct btf_type *t; 12213 12214 t = btf_type_skip_modifiers(btf, arg->type, NULL); 12215 if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) 12216 return false; 12217 12218 return btf_param_match_suffix(btf, arg, "__sz"); 12219 } 12220 12221 static bool is_kfunc_arg_const_mem_size(const struct btf *btf, 12222 const struct btf_param *arg, 12223 const struct bpf_reg_state *reg) 12224 { 12225 const struct btf_type *t; 12226 12227 t = btf_type_skip_modifiers(btf, arg->type, NULL); 12228 if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) 12229 return false; 12230 12231 return btf_param_match_suffix(btf, arg, "__szk"); 12232 } 12233 12234 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg) 12235 { 12236 return btf_param_match_suffix(btf, arg, "__k"); 12237 } 12238 12239 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg) 12240 { 12241 return btf_param_match_suffix(btf, arg, "__ign"); 12242 } 12243 12244 static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg) 12245 { 12246 return btf_param_match_suffix(btf, arg, "__map"); 12247 } 12248 12249 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg) 12250 { 12251 return btf_param_match_suffix(btf, arg, "__alloc"); 12252 } 12253 12254 static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg) 12255 { 12256 return btf_param_match_suffix(btf, arg, "__uninit"); 12257 } 12258 12259 static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg) 12260 { 12261 return btf_param_match_suffix(btf, arg, "__refcounted_kptr"); 12262 } 12263 12264 static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg) 12265 { 12266 return btf_param_match_suffix(btf, arg, "__nullable"); 12267 } 12268 12269 static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg) 12270 { 12271 return btf_param_match_suffix(btf, arg, "__str"); 12272 } 12273 12274 static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param *arg) 12275 { 12276 return btf_param_match_suffix(btf, arg, "__irq_flag"); 12277 } 12278 12279 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, 12280 const struct btf_param *arg, 12281 const char *name) 12282 { 12283 int len, target_len = strlen(name); 12284 const char *param_name; 12285 12286 param_name = btf_name_by_offset(btf, arg->name_off); 12287 if (str_is_empty(param_name)) 12288 return false; 12289 len = strlen(param_name); 12290 if (len != target_len) 12291 return false; 12292 if (strcmp(param_name, name)) 12293 return false; 12294 12295 return true; 12296 } 12297 12298 enum { 12299 KF_ARG_DYNPTR_ID, 12300 KF_ARG_LIST_HEAD_ID, 12301 KF_ARG_LIST_NODE_ID, 12302 KF_ARG_RB_ROOT_ID, 12303 KF_ARG_RB_NODE_ID, 12304 KF_ARG_WORKQUEUE_ID, 12305 KF_ARG_RES_SPIN_LOCK_ID, 12306 KF_ARG_TASK_WORK_ID, 12307 KF_ARG_PROG_AUX_ID, 12308 KF_ARG_TIMER_ID 12309 }; 12310 12311 BTF_ID_LIST(kf_arg_btf_ids) 12312 BTF_ID(struct, bpf_dynptr) 12313 BTF_ID(struct, bpf_list_head) 12314 BTF_ID(struct, bpf_list_node) 12315 BTF_ID(struct, bpf_rb_root) 12316 BTF_ID(struct, bpf_rb_node) 12317 BTF_ID(struct, bpf_wq) 12318 BTF_ID(struct, bpf_res_spin_lock) 12319 BTF_ID(struct, bpf_task_work) 12320 BTF_ID(struct, bpf_prog_aux) 12321 BTF_ID(struct, bpf_timer) 12322 12323 static bool __is_kfunc_ptr_arg_type(const struct btf *btf, 12324 const struct btf_param *arg, int type) 12325 { 12326 const struct btf_type *t; 12327 u32 res_id; 12328 12329 t = btf_type_skip_modifiers(btf, arg->type, NULL); 12330 if (!t) 12331 return false; 12332 if (!btf_type_is_ptr(t)) 12333 return false; 12334 t = btf_type_skip_modifiers(btf, t->type, &res_id); 12335 if (!t) 12336 return false; 12337 return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]); 12338 } 12339 12340 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg) 12341 { 12342 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID); 12343 } 12344 12345 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg) 12346 { 12347 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID); 12348 } 12349 12350 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg) 12351 { 12352 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID); 12353 } 12354 12355 static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg) 12356 { 12357 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID); 12358 } 12359 12360 static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg) 12361 { 12362 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID); 12363 } 12364 12365 static bool is_kfunc_arg_timer(const struct btf *btf, const struct btf_param *arg) 12366 { 12367 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TIMER_ID); 12368 } 12369 12370 static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg) 12371 { 12372 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID); 12373 } 12374 12375 static bool is_kfunc_arg_task_work(const struct btf *btf, const struct btf_param *arg) 12376 { 12377 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TASK_WORK_ID); 12378 } 12379 12380 static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg) 12381 { 12382 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID); 12383 } 12384 12385 static bool is_rbtree_node_type(const struct btf_type *t) 12386 { 12387 return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_RB_NODE_ID]); 12388 } 12389 12390 static bool is_list_node_type(const struct btf_type *t) 12391 { 12392 return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_LIST_NODE_ID]); 12393 } 12394 12395 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf, 12396 const struct btf_param *arg) 12397 { 12398 const struct btf_type *t; 12399 12400 t = btf_type_resolve_func_ptr(btf, arg->type, NULL); 12401 if (!t) 12402 return false; 12403 12404 return true; 12405 } 12406 12407 static bool is_kfunc_arg_prog_aux(const struct btf *btf, const struct btf_param *arg) 12408 { 12409 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_PROG_AUX_ID); 12410 } 12411 12412 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */ 12413 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env, 12414 const struct btf *btf, 12415 const struct btf_type *t, int rec) 12416 { 12417 const struct btf_type *member_type; 12418 const struct btf_member *member; 12419 u32 i; 12420 12421 if (!btf_type_is_struct(t)) 12422 return false; 12423 12424 for_each_member(i, t, member) { 12425 const struct btf_array *array; 12426 12427 member_type = btf_type_skip_modifiers(btf, member->type, NULL); 12428 if (btf_type_is_struct(member_type)) { 12429 if (rec >= 3) { 12430 verbose(env, "max struct nesting depth exceeded\n"); 12431 return false; 12432 } 12433 if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1)) 12434 return false; 12435 continue; 12436 } 12437 if (btf_type_is_array(member_type)) { 12438 array = btf_array(member_type); 12439 if (!array->nelems) 12440 return false; 12441 member_type = btf_type_skip_modifiers(btf, array->type, NULL); 12442 if (!btf_type_is_scalar(member_type)) 12443 return false; 12444 continue; 12445 } 12446 if (!btf_type_is_scalar(member_type)) 12447 return false; 12448 } 12449 return true; 12450 } 12451 12452 enum kfunc_ptr_arg_type { 12453 KF_ARG_PTR_TO_CTX, 12454 KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */ 12455 KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */ 12456 KF_ARG_PTR_TO_DYNPTR, 12457 KF_ARG_PTR_TO_ITER, 12458 KF_ARG_PTR_TO_LIST_HEAD, 12459 KF_ARG_PTR_TO_LIST_NODE, 12460 KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */ 12461 KF_ARG_PTR_TO_MEM, 12462 KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */ 12463 KF_ARG_PTR_TO_CALLBACK, 12464 KF_ARG_PTR_TO_RB_ROOT, 12465 KF_ARG_PTR_TO_RB_NODE, 12466 KF_ARG_PTR_TO_NULL, 12467 KF_ARG_PTR_TO_CONST_STR, 12468 KF_ARG_PTR_TO_MAP, 12469 KF_ARG_PTR_TO_TIMER, 12470 KF_ARG_PTR_TO_WORKQUEUE, 12471 KF_ARG_PTR_TO_IRQ_FLAG, 12472 KF_ARG_PTR_TO_RES_SPIN_LOCK, 12473 KF_ARG_PTR_TO_TASK_WORK, 12474 }; 12475 12476 enum special_kfunc_type { 12477 KF_bpf_obj_new_impl, 12478 KF_bpf_obj_drop_impl, 12479 KF_bpf_refcount_acquire_impl, 12480 KF_bpf_list_push_front_impl, 12481 KF_bpf_list_push_back_impl, 12482 KF_bpf_list_pop_front, 12483 KF_bpf_list_pop_back, 12484 KF_bpf_list_front, 12485 KF_bpf_list_back, 12486 KF_bpf_cast_to_kern_ctx, 12487 KF_bpf_rdonly_cast, 12488 KF_bpf_rcu_read_lock, 12489 KF_bpf_rcu_read_unlock, 12490 KF_bpf_rbtree_remove, 12491 KF_bpf_rbtree_add_impl, 12492 KF_bpf_rbtree_first, 12493 KF_bpf_rbtree_root, 12494 KF_bpf_rbtree_left, 12495 KF_bpf_rbtree_right, 12496 KF_bpf_dynptr_from_skb, 12497 KF_bpf_dynptr_from_xdp, 12498 KF_bpf_dynptr_from_skb_meta, 12499 KF_bpf_xdp_pull_data, 12500 KF_bpf_dynptr_slice, 12501 KF_bpf_dynptr_slice_rdwr, 12502 KF_bpf_dynptr_clone, 12503 KF_bpf_percpu_obj_new_impl, 12504 KF_bpf_percpu_obj_drop_impl, 12505 KF_bpf_throw, 12506 KF_bpf_wq_set_callback, 12507 KF_bpf_preempt_disable, 12508 KF_bpf_preempt_enable, 12509 KF_bpf_iter_css_task_new, 12510 KF_bpf_session_cookie, 12511 KF_bpf_get_kmem_cache, 12512 KF_bpf_local_irq_save, 12513 KF_bpf_local_irq_restore, 12514 KF_bpf_iter_num_new, 12515 KF_bpf_iter_num_next, 12516 KF_bpf_iter_num_destroy, 12517 KF_bpf_set_dentry_xattr, 12518 KF_bpf_remove_dentry_xattr, 12519 KF_bpf_res_spin_lock, 12520 KF_bpf_res_spin_unlock, 12521 KF_bpf_res_spin_lock_irqsave, 12522 KF_bpf_res_spin_unlock_irqrestore, 12523 KF_bpf_dynptr_from_file, 12524 KF_bpf_dynptr_file_discard, 12525 KF___bpf_trap, 12526 KF_bpf_task_work_schedule_signal, 12527 KF_bpf_task_work_schedule_resume, 12528 KF_bpf_arena_alloc_pages, 12529 KF_bpf_arena_free_pages, 12530 KF_bpf_arena_reserve_pages, 12531 KF_bpf_session_is_return, 12532 KF_bpf_stream_vprintk, 12533 KF_bpf_stream_print_stack, 12534 }; 12535 12536 BTF_ID_LIST(special_kfunc_list) 12537 BTF_ID(func, bpf_obj_new_impl) 12538 BTF_ID(func, bpf_obj_drop_impl) 12539 BTF_ID(func, bpf_refcount_acquire_impl) 12540 BTF_ID(func, bpf_list_push_front_impl) 12541 BTF_ID(func, bpf_list_push_back_impl) 12542 BTF_ID(func, bpf_list_pop_front) 12543 BTF_ID(func, bpf_list_pop_back) 12544 BTF_ID(func, bpf_list_front) 12545 BTF_ID(func, bpf_list_back) 12546 BTF_ID(func, bpf_cast_to_kern_ctx) 12547 BTF_ID(func, bpf_rdonly_cast) 12548 BTF_ID(func, bpf_rcu_read_lock) 12549 BTF_ID(func, bpf_rcu_read_unlock) 12550 BTF_ID(func, bpf_rbtree_remove) 12551 BTF_ID(func, bpf_rbtree_add_impl) 12552 BTF_ID(func, bpf_rbtree_first) 12553 BTF_ID(func, bpf_rbtree_root) 12554 BTF_ID(func, bpf_rbtree_left) 12555 BTF_ID(func, bpf_rbtree_right) 12556 #ifdef CONFIG_NET 12557 BTF_ID(func, bpf_dynptr_from_skb) 12558 BTF_ID(func, bpf_dynptr_from_xdp) 12559 BTF_ID(func, bpf_dynptr_from_skb_meta) 12560 BTF_ID(func, bpf_xdp_pull_data) 12561 #else 12562 BTF_ID_UNUSED 12563 BTF_ID_UNUSED 12564 BTF_ID_UNUSED 12565 BTF_ID_UNUSED 12566 #endif 12567 BTF_ID(func, bpf_dynptr_slice) 12568 BTF_ID(func, bpf_dynptr_slice_rdwr) 12569 BTF_ID(func, bpf_dynptr_clone) 12570 BTF_ID(func, bpf_percpu_obj_new_impl) 12571 BTF_ID(func, bpf_percpu_obj_drop_impl) 12572 BTF_ID(func, bpf_throw) 12573 BTF_ID(func, bpf_wq_set_callback) 12574 BTF_ID(func, bpf_preempt_disable) 12575 BTF_ID(func, bpf_preempt_enable) 12576 #ifdef CONFIG_CGROUPS 12577 BTF_ID(func, bpf_iter_css_task_new) 12578 #else 12579 BTF_ID_UNUSED 12580 #endif 12581 #ifdef CONFIG_BPF_EVENTS 12582 BTF_ID(func, bpf_session_cookie) 12583 #else 12584 BTF_ID_UNUSED 12585 #endif 12586 BTF_ID(func, bpf_get_kmem_cache) 12587 BTF_ID(func, bpf_local_irq_save) 12588 BTF_ID(func, bpf_local_irq_restore) 12589 BTF_ID(func, bpf_iter_num_new) 12590 BTF_ID(func, bpf_iter_num_next) 12591 BTF_ID(func, bpf_iter_num_destroy) 12592 #ifdef CONFIG_BPF_LSM 12593 BTF_ID(func, bpf_set_dentry_xattr) 12594 BTF_ID(func, bpf_remove_dentry_xattr) 12595 #else 12596 BTF_ID_UNUSED 12597 BTF_ID_UNUSED 12598 #endif 12599 BTF_ID(func, bpf_res_spin_lock) 12600 BTF_ID(func, bpf_res_spin_unlock) 12601 BTF_ID(func, bpf_res_spin_lock_irqsave) 12602 BTF_ID(func, bpf_res_spin_unlock_irqrestore) 12603 BTF_ID(func, bpf_dynptr_from_file) 12604 BTF_ID(func, bpf_dynptr_file_discard) 12605 BTF_ID(func, __bpf_trap) 12606 BTF_ID(func, bpf_task_work_schedule_signal) 12607 BTF_ID(func, bpf_task_work_schedule_resume) 12608 BTF_ID(func, bpf_arena_alloc_pages) 12609 BTF_ID(func, bpf_arena_free_pages) 12610 BTF_ID(func, bpf_arena_reserve_pages) 12611 BTF_ID(func, bpf_session_is_return) 12612 BTF_ID(func, bpf_stream_vprintk) 12613 BTF_ID(func, bpf_stream_print_stack) 12614 12615 static bool is_task_work_add_kfunc(u32 func_id) 12616 { 12617 return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] || 12618 func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume]; 12619 } 12620 12621 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) 12622 { 12623 if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] && 12624 meta->arg_owning_ref) { 12625 return false; 12626 } 12627 12628 return meta->kfunc_flags & KF_RET_NULL; 12629 } 12630 12631 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta) 12632 { 12633 return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock]; 12634 } 12635 12636 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta) 12637 { 12638 return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock]; 12639 } 12640 12641 static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta) 12642 { 12643 return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable]; 12644 } 12645 12646 static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta) 12647 { 12648 return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable]; 12649 } 12650 12651 static bool is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta) 12652 { 12653 return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data]; 12654 } 12655 12656 static enum kfunc_ptr_arg_type 12657 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, 12658 struct bpf_kfunc_call_arg_meta *meta, 12659 const struct btf_type *t, const struct btf_type *ref_t, 12660 const char *ref_tname, const struct btf_param *args, 12661 int argno, int nargs) 12662 { 12663 u32 regno = argno + 1; 12664 struct bpf_reg_state *regs = cur_regs(env); 12665 struct bpf_reg_state *reg = ®s[regno]; 12666 bool arg_mem_size = false; 12667 12668 if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || 12669 meta->func_id == special_kfunc_list[KF_bpf_session_is_return] || 12670 meta->func_id == special_kfunc_list[KF_bpf_session_cookie]) 12671 return KF_ARG_PTR_TO_CTX; 12672 12673 if (argno + 1 < nargs && 12674 (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1]) || 12675 is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1]))) 12676 arg_mem_size = true; 12677 12678 /* In this function, we verify the kfunc's BTF as per the argument type, 12679 * leaving the rest of the verification with respect to the register 12680 * type to our caller. When a set of conditions hold in the BTF type of 12681 * arguments, we resolve it to a known kfunc_ptr_arg_type. 12682 */ 12683 if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno)) 12684 return KF_ARG_PTR_TO_CTX; 12685 12686 if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg) && 12687 !arg_mem_size) 12688 return KF_ARG_PTR_TO_NULL; 12689 12690 if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno])) 12691 return KF_ARG_PTR_TO_ALLOC_BTF_ID; 12692 12693 if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[argno])) 12694 return KF_ARG_PTR_TO_REFCOUNTED_KPTR; 12695 12696 if (is_kfunc_arg_dynptr(meta->btf, &args[argno])) 12697 return KF_ARG_PTR_TO_DYNPTR; 12698 12699 if (is_kfunc_arg_iter(meta, argno, &args[argno])) 12700 return KF_ARG_PTR_TO_ITER; 12701 12702 if (is_kfunc_arg_list_head(meta->btf, &args[argno])) 12703 return KF_ARG_PTR_TO_LIST_HEAD; 12704 12705 if (is_kfunc_arg_list_node(meta->btf, &args[argno])) 12706 return KF_ARG_PTR_TO_LIST_NODE; 12707 12708 if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno])) 12709 return KF_ARG_PTR_TO_RB_ROOT; 12710 12711 if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno])) 12712 return KF_ARG_PTR_TO_RB_NODE; 12713 12714 if (is_kfunc_arg_const_str(meta->btf, &args[argno])) 12715 return KF_ARG_PTR_TO_CONST_STR; 12716 12717 if (is_kfunc_arg_map(meta->btf, &args[argno])) 12718 return KF_ARG_PTR_TO_MAP; 12719 12720 if (is_kfunc_arg_wq(meta->btf, &args[argno])) 12721 return KF_ARG_PTR_TO_WORKQUEUE; 12722 12723 if (is_kfunc_arg_timer(meta->btf, &args[argno])) 12724 return KF_ARG_PTR_TO_TIMER; 12725 12726 if (is_kfunc_arg_task_work(meta->btf, &args[argno])) 12727 return KF_ARG_PTR_TO_TASK_WORK; 12728 12729 if (is_kfunc_arg_irq_flag(meta->btf, &args[argno])) 12730 return KF_ARG_PTR_TO_IRQ_FLAG; 12731 12732 if (is_kfunc_arg_res_spin_lock(meta->btf, &args[argno])) 12733 return KF_ARG_PTR_TO_RES_SPIN_LOCK; 12734 12735 if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { 12736 if (!btf_type_is_struct(ref_t)) { 12737 verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n", 12738 meta->func_name, argno, btf_type_str(ref_t), ref_tname); 12739 return -EINVAL; 12740 } 12741 return KF_ARG_PTR_TO_BTF_ID; 12742 } 12743 12744 if (is_kfunc_arg_callback(env, meta->btf, &args[argno])) 12745 return KF_ARG_PTR_TO_CALLBACK; 12746 12747 /* This is the catch all argument type of register types supported by 12748 * check_helper_mem_access. However, we only allow when argument type is 12749 * pointer to scalar, or struct composed (recursively) of scalars. When 12750 * arg_mem_size is true, the pointer can be void *. 12751 */ 12752 if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) && 12753 (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) { 12754 verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n", 12755 argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : ""); 12756 return -EINVAL; 12757 } 12758 return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM; 12759 } 12760 12761 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, 12762 struct bpf_reg_state *reg, 12763 const struct btf_type *ref_t, 12764 const char *ref_tname, u32 ref_id, 12765 struct bpf_kfunc_call_arg_meta *meta, 12766 int argno) 12767 { 12768 const struct btf_type *reg_ref_t; 12769 bool strict_type_match = false; 12770 const struct btf *reg_btf; 12771 const char *reg_ref_tname; 12772 bool taking_projection; 12773 bool struct_same; 12774 u32 reg_ref_id; 12775 12776 if (base_type(reg->type) == PTR_TO_BTF_ID) { 12777 reg_btf = reg->btf; 12778 reg_ref_id = reg->btf_id; 12779 } else { 12780 reg_btf = btf_vmlinux; 12781 reg_ref_id = *reg2btf_ids[base_type(reg->type)]; 12782 } 12783 12784 /* Enforce strict type matching for calls to kfuncs that are acquiring 12785 * or releasing a reference, or are no-cast aliases. We do _not_ 12786 * enforce strict matching for kfuncs by default, 12787 * as we want to enable BPF programs to pass types that are bitwise 12788 * equivalent without forcing them to explicitly cast with something 12789 * like bpf_cast_to_kern_ctx(). 12790 * 12791 * For example, say we had a type like the following: 12792 * 12793 * struct bpf_cpumask { 12794 * cpumask_t cpumask; 12795 * refcount_t usage; 12796 * }; 12797 * 12798 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed 12799 * to a struct cpumask, so it would be safe to pass a struct 12800 * bpf_cpumask * to a kfunc expecting a struct cpumask *. 12801 * 12802 * The philosophy here is similar to how we allow scalars of different 12803 * types to be passed to kfuncs as long as the size is the same. The 12804 * only difference here is that we're simply allowing 12805 * btf_struct_ids_match() to walk the struct at the 0th offset, and 12806 * resolve types. 12807 */ 12808 if ((is_kfunc_release(meta) && reg->ref_obj_id) || 12809 btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id)) 12810 strict_type_match = true; 12811 12812 WARN_ON_ONCE(is_kfunc_release(meta) && 12813 (reg->off || !tnum_is_const(reg->var_off) || 12814 reg->var_off.value)); 12815 12816 reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, ®_ref_id); 12817 reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off); 12818 struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match); 12819 /* If kfunc is accepting a projection type (ie. __sk_buff), it cannot 12820 * actually use it -- it must cast to the underlying type. So we allow 12821 * caller to pass in the underlying type. 12822 */ 12823 taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname); 12824 if (!taking_projection && !struct_same) { 12825 verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", 12826 meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1, 12827 btf_type_str(reg_ref_t), reg_ref_tname); 12828 return -EINVAL; 12829 } 12830 return 0; 12831 } 12832 12833 static int process_irq_flag(struct bpf_verifier_env *env, int regno, 12834 struct bpf_kfunc_call_arg_meta *meta) 12835 { 12836 struct bpf_reg_state *reg = reg_state(env, regno); 12837 int err, kfunc_class = IRQ_NATIVE_KFUNC; 12838 bool irq_save; 12839 12840 if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save] || 12841 meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) { 12842 irq_save = true; 12843 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) 12844 kfunc_class = IRQ_LOCK_KFUNC; 12845 } else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore] || 12846 meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) { 12847 irq_save = false; 12848 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) 12849 kfunc_class = IRQ_LOCK_KFUNC; 12850 } else { 12851 verifier_bug(env, "unknown irq flags kfunc"); 12852 return -EFAULT; 12853 } 12854 12855 if (irq_save) { 12856 if (!is_irq_flag_reg_valid_uninit(env, reg)) { 12857 verbose(env, "expected uninitialized irq flag as arg#%d\n", regno - 1); 12858 return -EINVAL; 12859 } 12860 12861 err = check_mem_access(env, env->insn_idx, regno, 0, BPF_DW, BPF_WRITE, -1, false, false); 12862 if (err) 12863 return err; 12864 12865 err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx, kfunc_class); 12866 if (err) 12867 return err; 12868 } else { 12869 err = is_irq_flag_reg_valid_init(env, reg); 12870 if (err) { 12871 verbose(env, "expected an initialized irq flag as arg#%d\n", regno - 1); 12872 return err; 12873 } 12874 12875 err = mark_irq_flag_read(env, reg); 12876 if (err) 12877 return err; 12878 12879 err = unmark_stack_slot_irq_flag(env, reg, kfunc_class); 12880 if (err) 12881 return err; 12882 } 12883 return 0; 12884 } 12885 12886 12887 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 12888 { 12889 struct btf_record *rec = reg_btf_record(reg); 12890 12891 if (!env->cur_state->active_locks) { 12892 verifier_bug(env, "%s w/o active lock", __func__); 12893 return -EFAULT; 12894 } 12895 12896 if (type_flag(reg->type) & NON_OWN_REF) { 12897 verifier_bug(env, "NON_OWN_REF already set"); 12898 return -EFAULT; 12899 } 12900 12901 reg->type |= NON_OWN_REF; 12902 if (rec->refcount_off >= 0) 12903 reg->type |= MEM_RCU; 12904 12905 return 0; 12906 } 12907 12908 static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id) 12909 { 12910 struct bpf_verifier_state *state = env->cur_state; 12911 struct bpf_func_state *unused; 12912 struct bpf_reg_state *reg; 12913 int i; 12914 12915 if (!ref_obj_id) { 12916 verifier_bug(env, "ref_obj_id is zero for owning -> non-owning conversion"); 12917 return -EFAULT; 12918 } 12919 12920 for (i = 0; i < state->acquired_refs; i++) { 12921 if (state->refs[i].id != ref_obj_id) 12922 continue; 12923 12924 /* Clear ref_obj_id here so release_reference doesn't clobber 12925 * the whole reg 12926 */ 12927 bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({ 12928 if (reg->ref_obj_id == ref_obj_id) { 12929 reg->ref_obj_id = 0; 12930 ref_set_non_owning(env, reg); 12931 } 12932 })); 12933 return 0; 12934 } 12935 12936 verifier_bug(env, "ref state missing for ref_obj_id"); 12937 return -EFAULT; 12938 } 12939 12940 /* Implementation details: 12941 * 12942 * Each register points to some region of memory, which we define as an 12943 * allocation. Each allocation may embed a bpf_spin_lock which protects any 12944 * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same 12945 * allocation. The lock and the data it protects are colocated in the same 12946 * memory region. 12947 * 12948 * Hence, everytime a register holds a pointer value pointing to such 12949 * allocation, the verifier preserves a unique reg->id for it. 12950 * 12951 * The verifier remembers the lock 'ptr' and the lock 'id' whenever 12952 * bpf_spin_lock is called. 12953 * 12954 * To enable this, lock state in the verifier captures two values: 12955 * active_lock.ptr = Register's type specific pointer 12956 * active_lock.id = A unique ID for each register pointer value 12957 * 12958 * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two 12959 * supported register types. 12960 * 12961 * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of 12962 * allocated objects is the reg->btf pointer. 12963 * 12964 * The active_lock.id is non-unique for maps supporting direct_value_addr, as we 12965 * can establish the provenance of the map value statically for each distinct 12966 * lookup into such maps. They always contain a single map value hence unique 12967 * IDs for each pseudo load pessimizes the algorithm and rejects valid programs. 12968 * 12969 * So, in case of global variables, they use array maps with max_entries = 1, 12970 * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point 12971 * into the same map value as max_entries is 1, as described above). 12972 * 12973 * In case of inner map lookups, the inner map pointer has same map_ptr as the 12974 * outer map pointer (in verifier context), but each lookup into an inner map 12975 * assigns a fresh reg->id to the lookup, so while lookups into distinct inner 12976 * maps from the same outer map share the same map_ptr as active_lock.ptr, they 12977 * will get different reg->id assigned to each lookup, hence different 12978 * active_lock.id. 12979 * 12980 * In case of allocated objects, active_lock.ptr is the reg->btf, and the 12981 * reg->id is a unique ID preserved after the NULL pointer check on the pointer 12982 * returned from bpf_obj_new. Each allocation receives a new reg->id. 12983 */ 12984 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 12985 { 12986 struct bpf_reference_state *s; 12987 void *ptr; 12988 u32 id; 12989 12990 switch ((int)reg->type) { 12991 case PTR_TO_MAP_VALUE: 12992 ptr = reg->map_ptr; 12993 break; 12994 case PTR_TO_BTF_ID | MEM_ALLOC: 12995 ptr = reg->btf; 12996 break; 12997 default: 12998 verifier_bug(env, "unknown reg type for lock check"); 12999 return -EFAULT; 13000 } 13001 id = reg->id; 13002 13003 if (!env->cur_state->active_locks) 13004 return -EINVAL; 13005 s = find_lock_state(env->cur_state, REF_TYPE_LOCK_MASK, id, ptr); 13006 if (!s) { 13007 verbose(env, "held lock and object are not in the same allocation\n"); 13008 return -EINVAL; 13009 } 13010 return 0; 13011 } 13012 13013 static bool is_bpf_list_api_kfunc(u32 btf_id) 13014 { 13015 return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] || 13016 btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] || 13017 btf_id == special_kfunc_list[KF_bpf_list_pop_front] || 13018 btf_id == special_kfunc_list[KF_bpf_list_pop_back] || 13019 btf_id == special_kfunc_list[KF_bpf_list_front] || 13020 btf_id == special_kfunc_list[KF_bpf_list_back]; 13021 } 13022 13023 static bool is_bpf_rbtree_api_kfunc(u32 btf_id) 13024 { 13025 return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] || 13026 btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || 13027 btf_id == special_kfunc_list[KF_bpf_rbtree_first] || 13028 btf_id == special_kfunc_list[KF_bpf_rbtree_root] || 13029 btf_id == special_kfunc_list[KF_bpf_rbtree_left] || 13030 btf_id == special_kfunc_list[KF_bpf_rbtree_right]; 13031 } 13032 13033 static bool is_bpf_iter_num_api_kfunc(u32 btf_id) 13034 { 13035 return btf_id == special_kfunc_list[KF_bpf_iter_num_new] || 13036 btf_id == special_kfunc_list[KF_bpf_iter_num_next] || 13037 btf_id == special_kfunc_list[KF_bpf_iter_num_destroy]; 13038 } 13039 13040 static bool is_bpf_graph_api_kfunc(u32 btf_id) 13041 { 13042 return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id) || 13043 btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]; 13044 } 13045 13046 static bool is_bpf_res_spin_lock_kfunc(u32 btf_id) 13047 { 13048 return btf_id == special_kfunc_list[KF_bpf_res_spin_lock] || 13049 btf_id == special_kfunc_list[KF_bpf_res_spin_unlock] || 13050 btf_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] || 13051 btf_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]; 13052 } 13053 13054 static bool is_bpf_arena_kfunc(u32 btf_id) 13055 { 13056 return btf_id == special_kfunc_list[KF_bpf_arena_alloc_pages] || 13057 btf_id == special_kfunc_list[KF_bpf_arena_free_pages] || 13058 btf_id == special_kfunc_list[KF_bpf_arena_reserve_pages]; 13059 } 13060 13061 static bool is_bpf_stream_kfunc(u32 btf_id) 13062 { 13063 return btf_id == special_kfunc_list[KF_bpf_stream_vprintk] || 13064 btf_id == special_kfunc_list[KF_bpf_stream_print_stack]; 13065 } 13066 13067 static bool kfunc_spin_allowed(u32 btf_id) 13068 { 13069 return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id) || 13070 is_bpf_res_spin_lock_kfunc(btf_id) || is_bpf_arena_kfunc(btf_id) || 13071 is_bpf_stream_kfunc(btf_id); 13072 } 13073 13074 static bool is_sync_callback_calling_kfunc(u32 btf_id) 13075 { 13076 return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]; 13077 } 13078 13079 static bool is_async_callback_calling_kfunc(u32 btf_id) 13080 { 13081 return is_bpf_wq_set_callback_kfunc(btf_id) || 13082 is_task_work_add_kfunc(btf_id); 13083 } 13084 13085 static bool is_bpf_throw_kfunc(struct bpf_insn *insn) 13086 { 13087 return bpf_pseudo_kfunc_call(insn) && insn->off == 0 && 13088 insn->imm == special_kfunc_list[KF_bpf_throw]; 13089 } 13090 13091 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id) 13092 { 13093 return btf_id == special_kfunc_list[KF_bpf_wq_set_callback]; 13094 } 13095 13096 static bool is_callback_calling_kfunc(u32 btf_id) 13097 { 13098 return is_sync_callback_calling_kfunc(btf_id) || 13099 is_async_callback_calling_kfunc(btf_id); 13100 } 13101 13102 static bool is_rbtree_lock_required_kfunc(u32 btf_id) 13103 { 13104 return is_bpf_rbtree_api_kfunc(btf_id); 13105 } 13106 13107 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env, 13108 enum btf_field_type head_field_type, 13109 u32 kfunc_btf_id) 13110 { 13111 bool ret; 13112 13113 switch (head_field_type) { 13114 case BPF_LIST_HEAD: 13115 ret = is_bpf_list_api_kfunc(kfunc_btf_id); 13116 break; 13117 case BPF_RB_ROOT: 13118 ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id); 13119 break; 13120 default: 13121 verbose(env, "verifier internal error: unexpected graph root argument type %s\n", 13122 btf_field_type_name(head_field_type)); 13123 return false; 13124 } 13125 13126 if (!ret) 13127 verbose(env, "verifier internal error: %s head arg for unknown kfunc\n", 13128 btf_field_type_name(head_field_type)); 13129 return ret; 13130 } 13131 13132 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env, 13133 enum btf_field_type node_field_type, 13134 u32 kfunc_btf_id) 13135 { 13136 bool ret; 13137 13138 switch (node_field_type) { 13139 case BPF_LIST_NODE: 13140 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] || 13141 kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]); 13142 break; 13143 case BPF_RB_NODE: 13144 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || 13145 kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] || 13146 kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] || 13147 kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]); 13148 break; 13149 default: 13150 verbose(env, "verifier internal error: unexpected graph node argument type %s\n", 13151 btf_field_type_name(node_field_type)); 13152 return false; 13153 } 13154 13155 if (!ret) 13156 verbose(env, "verifier internal error: %s node arg for unknown kfunc\n", 13157 btf_field_type_name(node_field_type)); 13158 return ret; 13159 } 13160 13161 static int 13162 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env, 13163 struct bpf_reg_state *reg, u32 regno, 13164 struct bpf_kfunc_call_arg_meta *meta, 13165 enum btf_field_type head_field_type, 13166 struct btf_field **head_field) 13167 { 13168 const char *head_type_name; 13169 struct btf_field *field; 13170 struct btf_record *rec; 13171 u32 head_off; 13172 13173 if (meta->btf != btf_vmlinux) { 13174 verifier_bug(env, "unexpected btf mismatch in kfunc call"); 13175 return -EFAULT; 13176 } 13177 13178 if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id)) 13179 return -EFAULT; 13180 13181 head_type_name = btf_field_type_name(head_field_type); 13182 if (!tnum_is_const(reg->var_off)) { 13183 verbose(env, 13184 "R%d doesn't have constant offset. %s has to be at the constant offset\n", 13185 regno, head_type_name); 13186 return -EINVAL; 13187 } 13188 13189 rec = reg_btf_record(reg); 13190 head_off = reg->off + reg->var_off.value; 13191 field = btf_record_find(rec, head_off, head_field_type); 13192 if (!field) { 13193 verbose(env, "%s not found at offset=%u\n", head_type_name, head_off); 13194 return -EINVAL; 13195 } 13196 13197 /* All functions require bpf_list_head to be protected using a bpf_spin_lock */ 13198 if (check_reg_allocation_locked(env, reg)) { 13199 verbose(env, "bpf_spin_lock at off=%d must be held for %s\n", 13200 rec->spin_lock_off, head_type_name); 13201 return -EINVAL; 13202 } 13203 13204 if (*head_field) { 13205 verifier_bug(env, "repeating %s arg", head_type_name); 13206 return -EFAULT; 13207 } 13208 *head_field = field; 13209 return 0; 13210 } 13211 13212 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env, 13213 struct bpf_reg_state *reg, u32 regno, 13214 struct bpf_kfunc_call_arg_meta *meta) 13215 { 13216 return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD, 13217 &meta->arg_list_head.field); 13218 } 13219 13220 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env, 13221 struct bpf_reg_state *reg, u32 regno, 13222 struct bpf_kfunc_call_arg_meta *meta) 13223 { 13224 return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT, 13225 &meta->arg_rbtree_root.field); 13226 } 13227 13228 static int 13229 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env, 13230 struct bpf_reg_state *reg, u32 regno, 13231 struct bpf_kfunc_call_arg_meta *meta, 13232 enum btf_field_type head_field_type, 13233 enum btf_field_type node_field_type, 13234 struct btf_field **node_field) 13235 { 13236 const char *node_type_name; 13237 const struct btf_type *et, *t; 13238 struct btf_field *field; 13239 u32 node_off; 13240 13241 if (meta->btf != btf_vmlinux) { 13242 verifier_bug(env, "unexpected btf mismatch in kfunc call"); 13243 return -EFAULT; 13244 } 13245 13246 if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id)) 13247 return -EFAULT; 13248 13249 node_type_name = btf_field_type_name(node_field_type); 13250 if (!tnum_is_const(reg->var_off)) { 13251 verbose(env, 13252 "R%d doesn't have constant offset. %s has to be at the constant offset\n", 13253 regno, node_type_name); 13254 return -EINVAL; 13255 } 13256 13257 node_off = reg->off + reg->var_off.value; 13258 field = reg_find_field_offset(reg, node_off, node_field_type); 13259 if (!field) { 13260 verbose(env, "%s not found at offset=%u\n", node_type_name, node_off); 13261 return -EINVAL; 13262 } 13263 13264 field = *node_field; 13265 13266 et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id); 13267 t = btf_type_by_id(reg->btf, reg->btf_id); 13268 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf, 13269 field->graph_root.value_btf_id, true)) { 13270 verbose(env, "operation on %s expects arg#1 %s at offset=%d " 13271 "in struct %s, but arg is at offset=%d in struct %s\n", 13272 btf_field_type_name(head_field_type), 13273 btf_field_type_name(node_field_type), 13274 field->graph_root.node_offset, 13275 btf_name_by_offset(field->graph_root.btf, et->name_off), 13276 node_off, btf_name_by_offset(reg->btf, t->name_off)); 13277 return -EINVAL; 13278 } 13279 meta->arg_btf = reg->btf; 13280 meta->arg_btf_id = reg->btf_id; 13281 13282 if (node_off != field->graph_root.node_offset) { 13283 verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n", 13284 node_off, btf_field_type_name(node_field_type), 13285 field->graph_root.node_offset, 13286 btf_name_by_offset(field->graph_root.btf, et->name_off)); 13287 return -EINVAL; 13288 } 13289 13290 return 0; 13291 } 13292 13293 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env, 13294 struct bpf_reg_state *reg, u32 regno, 13295 struct bpf_kfunc_call_arg_meta *meta) 13296 { 13297 return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta, 13298 BPF_LIST_HEAD, BPF_LIST_NODE, 13299 &meta->arg_list_head.field); 13300 } 13301 13302 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env, 13303 struct bpf_reg_state *reg, u32 regno, 13304 struct bpf_kfunc_call_arg_meta *meta) 13305 { 13306 return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta, 13307 BPF_RB_ROOT, BPF_RB_NODE, 13308 &meta->arg_rbtree_root.field); 13309 } 13310 13311 /* 13312 * css_task iter allowlist is needed to avoid dead locking on css_set_lock. 13313 * LSM hooks and iters (both sleepable and non-sleepable) are safe. 13314 * Any sleepable progs are also safe since bpf_check_attach_target() enforce 13315 * them can only be attached to some specific hook points. 13316 */ 13317 static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env) 13318 { 13319 enum bpf_prog_type prog_type = resolve_prog_type(env->prog); 13320 13321 switch (prog_type) { 13322 case BPF_PROG_TYPE_LSM: 13323 return true; 13324 case BPF_PROG_TYPE_TRACING: 13325 if (env->prog->expected_attach_type == BPF_TRACE_ITER) 13326 return true; 13327 fallthrough; 13328 default: 13329 return in_sleepable(env); 13330 } 13331 } 13332 13333 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta, 13334 int insn_idx) 13335 { 13336 const char *func_name = meta->func_name, *ref_tname; 13337 const struct btf *btf = meta->btf; 13338 const struct btf_param *args; 13339 struct btf_record *rec; 13340 u32 i, nargs; 13341 int ret; 13342 13343 args = (const struct btf_param *)(meta->func_proto + 1); 13344 nargs = btf_type_vlen(meta->func_proto); 13345 if (nargs > MAX_BPF_FUNC_REG_ARGS) { 13346 verbose(env, "Function %s has %d > %d args\n", func_name, nargs, 13347 MAX_BPF_FUNC_REG_ARGS); 13348 return -EINVAL; 13349 } 13350 13351 /* Check that BTF function arguments match actual types that the 13352 * verifier sees. 13353 */ 13354 for (i = 0; i < nargs; i++) { 13355 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[i + 1]; 13356 const struct btf_type *t, *ref_t, *resolve_ret; 13357 enum bpf_arg_type arg_type = ARG_DONTCARE; 13358 u32 regno = i + 1, ref_id, type_size; 13359 bool is_ret_buf_sz = false; 13360 int kf_arg_type; 13361 13362 t = btf_type_skip_modifiers(btf, args[i].type, NULL); 13363 13364 if (is_kfunc_arg_ignore(btf, &args[i])) 13365 continue; 13366 13367 if (is_kfunc_arg_prog_aux(btf, &args[i])) { 13368 /* Reject repeated use bpf_prog_aux */ 13369 if (meta->arg_prog) { 13370 verifier_bug(env, "Only 1 prog->aux argument supported per-kfunc"); 13371 return -EFAULT; 13372 } 13373 meta->arg_prog = true; 13374 cur_aux(env)->arg_prog = regno; 13375 continue; 13376 } 13377 13378 if (btf_type_is_scalar(t)) { 13379 if (reg->type != SCALAR_VALUE) { 13380 verbose(env, "R%d is not a scalar\n", regno); 13381 return -EINVAL; 13382 } 13383 13384 if (is_kfunc_arg_constant(meta->btf, &args[i])) { 13385 if (meta->arg_constant.found) { 13386 verifier_bug(env, "only one constant argument permitted"); 13387 return -EFAULT; 13388 } 13389 if (!tnum_is_const(reg->var_off)) { 13390 verbose(env, "R%d must be a known constant\n", regno); 13391 return -EINVAL; 13392 } 13393 ret = mark_chain_precision(env, regno); 13394 if (ret < 0) 13395 return ret; 13396 meta->arg_constant.found = true; 13397 meta->arg_constant.value = reg->var_off.value; 13398 } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) { 13399 meta->r0_rdonly = true; 13400 is_ret_buf_sz = true; 13401 } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) { 13402 is_ret_buf_sz = true; 13403 } 13404 13405 if (is_ret_buf_sz) { 13406 if (meta->r0_size) { 13407 verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc"); 13408 return -EINVAL; 13409 } 13410 13411 if (!tnum_is_const(reg->var_off)) { 13412 verbose(env, "R%d is not a const\n", regno); 13413 return -EINVAL; 13414 } 13415 13416 meta->r0_size = reg->var_off.value; 13417 ret = mark_chain_precision(env, regno); 13418 if (ret) 13419 return ret; 13420 } 13421 continue; 13422 } 13423 13424 if (!btf_type_is_ptr(t)) { 13425 verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t)); 13426 return -EINVAL; 13427 } 13428 13429 if ((register_is_null(reg) || type_may_be_null(reg->type)) && 13430 !is_kfunc_arg_nullable(meta->btf, &args[i])) { 13431 verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i); 13432 return -EACCES; 13433 } 13434 13435 if (reg->ref_obj_id) { 13436 if (is_kfunc_release(meta) && meta->ref_obj_id) { 13437 verifier_bug(env, "more than one arg with ref_obj_id R%d %u %u", 13438 regno, reg->ref_obj_id, 13439 meta->ref_obj_id); 13440 return -EFAULT; 13441 } 13442 meta->ref_obj_id = reg->ref_obj_id; 13443 if (is_kfunc_release(meta)) 13444 meta->release_regno = regno; 13445 } 13446 13447 ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); 13448 ref_tname = btf_name_by_offset(btf, ref_t->name_off); 13449 13450 kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs); 13451 if (kf_arg_type < 0) 13452 return kf_arg_type; 13453 13454 switch (kf_arg_type) { 13455 case KF_ARG_PTR_TO_NULL: 13456 continue; 13457 case KF_ARG_PTR_TO_MAP: 13458 if (!reg->map_ptr) { 13459 verbose(env, "pointer in R%d isn't map pointer\n", regno); 13460 return -EINVAL; 13461 } 13462 if (meta->map.ptr && (reg->map_ptr->record->wq_off >= 0 || 13463 reg->map_ptr->record->task_work_off >= 0)) { 13464 /* Use map_uid (which is unique id of inner map) to reject: 13465 * inner_map1 = bpf_map_lookup_elem(outer_map, key1) 13466 * inner_map2 = bpf_map_lookup_elem(outer_map, key2) 13467 * if (inner_map1 && inner_map2) { 13468 * wq = bpf_map_lookup_elem(inner_map1); 13469 * if (wq) 13470 * // mismatch would have been allowed 13471 * bpf_wq_init(wq, inner_map2); 13472 * } 13473 * 13474 * Comparing map_ptr is enough to distinguish normal and outer maps. 13475 */ 13476 if (meta->map.ptr != reg->map_ptr || 13477 meta->map.uid != reg->map_uid) { 13478 if (reg->map_ptr->record->task_work_off >= 0) { 13479 verbose(env, 13480 "bpf_task_work pointer in R2 map_uid=%d doesn't match map pointer in R3 map_uid=%d\n", 13481 meta->map.uid, reg->map_uid); 13482 return -EINVAL; 13483 } 13484 verbose(env, 13485 "workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n", 13486 meta->map.uid, reg->map_uid); 13487 return -EINVAL; 13488 } 13489 } 13490 meta->map.ptr = reg->map_ptr; 13491 meta->map.uid = reg->map_uid; 13492 fallthrough; 13493 case KF_ARG_PTR_TO_ALLOC_BTF_ID: 13494 case KF_ARG_PTR_TO_BTF_ID: 13495 if (!is_trusted_reg(reg)) { 13496 if (!is_kfunc_rcu(meta)) { 13497 verbose(env, "R%d must be referenced or trusted\n", regno); 13498 return -EINVAL; 13499 } 13500 if (!is_rcu_reg(reg)) { 13501 verbose(env, "R%d must be a rcu pointer\n", regno); 13502 return -EINVAL; 13503 } 13504 } 13505 fallthrough; 13506 case KF_ARG_PTR_TO_CTX: 13507 case KF_ARG_PTR_TO_DYNPTR: 13508 case KF_ARG_PTR_TO_ITER: 13509 case KF_ARG_PTR_TO_LIST_HEAD: 13510 case KF_ARG_PTR_TO_LIST_NODE: 13511 case KF_ARG_PTR_TO_RB_ROOT: 13512 case KF_ARG_PTR_TO_RB_NODE: 13513 case KF_ARG_PTR_TO_MEM: 13514 case KF_ARG_PTR_TO_MEM_SIZE: 13515 case KF_ARG_PTR_TO_CALLBACK: 13516 case KF_ARG_PTR_TO_REFCOUNTED_KPTR: 13517 case KF_ARG_PTR_TO_CONST_STR: 13518 case KF_ARG_PTR_TO_WORKQUEUE: 13519 case KF_ARG_PTR_TO_TIMER: 13520 case KF_ARG_PTR_TO_TASK_WORK: 13521 case KF_ARG_PTR_TO_IRQ_FLAG: 13522 case KF_ARG_PTR_TO_RES_SPIN_LOCK: 13523 break; 13524 default: 13525 verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type); 13526 return -EFAULT; 13527 } 13528 13529 if (is_kfunc_release(meta) && reg->ref_obj_id) 13530 arg_type |= OBJ_RELEASE; 13531 ret = check_func_arg_reg_off(env, reg, regno, arg_type); 13532 if (ret < 0) 13533 return ret; 13534 13535 switch (kf_arg_type) { 13536 case KF_ARG_PTR_TO_CTX: 13537 if (reg->type != PTR_TO_CTX) { 13538 verbose(env, "arg#%d expected pointer to ctx, but got %s\n", 13539 i, reg_type_str(env, reg->type)); 13540 return -EINVAL; 13541 } 13542 13543 if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { 13544 ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog)); 13545 if (ret < 0) 13546 return -EINVAL; 13547 meta->ret_btf_id = ret; 13548 } 13549 break; 13550 case KF_ARG_PTR_TO_ALLOC_BTF_ID: 13551 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) { 13552 if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) { 13553 verbose(env, "arg#%d expected for bpf_obj_drop_impl()\n", i); 13554 return -EINVAL; 13555 } 13556 } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) { 13557 if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) { 13558 verbose(env, "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i); 13559 return -EINVAL; 13560 } 13561 } else { 13562 verbose(env, "arg#%d expected pointer to allocated object\n", i); 13563 return -EINVAL; 13564 } 13565 if (!reg->ref_obj_id) { 13566 verbose(env, "allocated object must be referenced\n"); 13567 return -EINVAL; 13568 } 13569 if (meta->btf == btf_vmlinux) { 13570 meta->arg_btf = reg->btf; 13571 meta->arg_btf_id = reg->btf_id; 13572 } 13573 break; 13574 case KF_ARG_PTR_TO_DYNPTR: 13575 { 13576 enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR; 13577 int clone_ref_obj_id = 0; 13578 13579 if (reg->type == CONST_PTR_TO_DYNPTR) 13580 dynptr_arg_type |= MEM_RDONLY; 13581 13582 if (is_kfunc_arg_uninit(btf, &args[i])) 13583 dynptr_arg_type |= MEM_UNINIT; 13584 13585 if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { 13586 dynptr_arg_type |= DYNPTR_TYPE_SKB; 13587 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) { 13588 dynptr_arg_type |= DYNPTR_TYPE_XDP; 13589 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) { 13590 dynptr_arg_type |= DYNPTR_TYPE_SKB_META; 13591 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) { 13592 dynptr_arg_type |= DYNPTR_TYPE_FILE; 13593 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) { 13594 dynptr_arg_type |= DYNPTR_TYPE_FILE; 13595 meta->release_regno = regno; 13596 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] && 13597 (dynptr_arg_type & MEM_UNINIT)) { 13598 enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type; 13599 13600 if (parent_type == BPF_DYNPTR_TYPE_INVALID) { 13601 verifier_bug(env, "no dynptr type for parent of clone"); 13602 return -EFAULT; 13603 } 13604 13605 dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type); 13606 clone_ref_obj_id = meta->initialized_dynptr.ref_obj_id; 13607 if (dynptr_type_refcounted(parent_type) && !clone_ref_obj_id) { 13608 verifier_bug(env, "missing ref obj id for parent of clone"); 13609 return -EFAULT; 13610 } 13611 } 13612 13613 ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id); 13614 if (ret < 0) 13615 return ret; 13616 13617 if (!(dynptr_arg_type & MEM_UNINIT)) { 13618 int id = dynptr_id(env, reg); 13619 13620 if (id < 0) { 13621 verifier_bug(env, "failed to obtain dynptr id"); 13622 return id; 13623 } 13624 meta->initialized_dynptr.id = id; 13625 meta->initialized_dynptr.type = dynptr_get_type(env, reg); 13626 meta->initialized_dynptr.ref_obj_id = dynptr_ref_obj_id(env, reg); 13627 } 13628 13629 break; 13630 } 13631 case KF_ARG_PTR_TO_ITER: 13632 if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) { 13633 if (!check_css_task_iter_allowlist(env)) { 13634 verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n"); 13635 return -EINVAL; 13636 } 13637 } 13638 ret = process_iter_arg(env, regno, insn_idx, meta); 13639 if (ret < 0) 13640 return ret; 13641 break; 13642 case KF_ARG_PTR_TO_LIST_HEAD: 13643 if (reg->type != PTR_TO_MAP_VALUE && 13644 reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { 13645 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i); 13646 return -EINVAL; 13647 } 13648 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) { 13649 verbose(env, "allocated object must be referenced\n"); 13650 return -EINVAL; 13651 } 13652 ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta); 13653 if (ret < 0) 13654 return ret; 13655 break; 13656 case KF_ARG_PTR_TO_RB_ROOT: 13657 if (reg->type != PTR_TO_MAP_VALUE && 13658 reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { 13659 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i); 13660 return -EINVAL; 13661 } 13662 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) { 13663 verbose(env, "allocated object must be referenced\n"); 13664 return -EINVAL; 13665 } 13666 ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta); 13667 if (ret < 0) 13668 return ret; 13669 break; 13670 case KF_ARG_PTR_TO_LIST_NODE: 13671 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { 13672 verbose(env, "arg#%d expected pointer to allocated object\n", i); 13673 return -EINVAL; 13674 } 13675 if (!reg->ref_obj_id) { 13676 verbose(env, "allocated object must be referenced\n"); 13677 return -EINVAL; 13678 } 13679 ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta); 13680 if (ret < 0) 13681 return ret; 13682 break; 13683 case KF_ARG_PTR_TO_RB_NODE: 13684 if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { 13685 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { 13686 verbose(env, "arg#%d expected pointer to allocated object\n", i); 13687 return -EINVAL; 13688 } 13689 if (!reg->ref_obj_id) { 13690 verbose(env, "allocated object must be referenced\n"); 13691 return -EINVAL; 13692 } 13693 } else { 13694 if (!type_is_non_owning_ref(reg->type) && !reg->ref_obj_id) { 13695 verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name); 13696 return -EINVAL; 13697 } 13698 if (in_rbtree_lock_required_cb(env)) { 13699 verbose(env, "%s not allowed in rbtree cb\n", func_name); 13700 return -EINVAL; 13701 } 13702 } 13703 13704 ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta); 13705 if (ret < 0) 13706 return ret; 13707 break; 13708 case KF_ARG_PTR_TO_MAP: 13709 /* If argument has '__map' suffix expect 'struct bpf_map *' */ 13710 ref_id = *reg2btf_ids[CONST_PTR_TO_MAP]; 13711 ref_t = btf_type_by_id(btf_vmlinux, ref_id); 13712 ref_tname = btf_name_by_offset(btf, ref_t->name_off); 13713 fallthrough; 13714 case KF_ARG_PTR_TO_BTF_ID: 13715 /* Only base_type is checked, further checks are done here */ 13716 if ((base_type(reg->type) != PTR_TO_BTF_ID || 13717 (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) && 13718 !reg2btf_ids[base_type(reg->type)]) { 13719 verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type)); 13720 verbose(env, "expected %s or socket\n", 13721 reg_type_str(env, base_type(reg->type) | 13722 (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS))); 13723 return -EINVAL; 13724 } 13725 ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i); 13726 if (ret < 0) 13727 return ret; 13728 break; 13729 case KF_ARG_PTR_TO_MEM: 13730 resolve_ret = btf_resolve_size(btf, ref_t, &type_size); 13731 if (IS_ERR(resolve_ret)) { 13732 verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n", 13733 i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret)); 13734 return -EINVAL; 13735 } 13736 ret = check_mem_reg(env, reg, regno, type_size); 13737 if (ret < 0) 13738 return ret; 13739 break; 13740 case KF_ARG_PTR_TO_MEM_SIZE: 13741 { 13742 struct bpf_reg_state *buff_reg = ®s[regno]; 13743 const struct btf_param *buff_arg = &args[i]; 13744 struct bpf_reg_state *size_reg = ®s[regno + 1]; 13745 const struct btf_param *size_arg = &args[i + 1]; 13746 13747 if (!register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) { 13748 ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1); 13749 if (ret < 0) { 13750 verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1); 13751 return ret; 13752 } 13753 } 13754 13755 if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) { 13756 if (meta->arg_constant.found) { 13757 verifier_bug(env, "only one constant argument permitted"); 13758 return -EFAULT; 13759 } 13760 if (!tnum_is_const(size_reg->var_off)) { 13761 verbose(env, "R%d must be a known constant\n", regno + 1); 13762 return -EINVAL; 13763 } 13764 meta->arg_constant.found = true; 13765 meta->arg_constant.value = size_reg->var_off.value; 13766 } 13767 13768 /* Skip next '__sz' or '__szk' argument */ 13769 i++; 13770 break; 13771 } 13772 case KF_ARG_PTR_TO_CALLBACK: 13773 if (reg->type != PTR_TO_FUNC) { 13774 verbose(env, "arg%d expected pointer to func\n", i); 13775 return -EINVAL; 13776 } 13777 meta->subprogno = reg->subprogno; 13778 break; 13779 case KF_ARG_PTR_TO_REFCOUNTED_KPTR: 13780 if (!type_is_ptr_alloc_obj(reg->type)) { 13781 verbose(env, "arg#%d is neither owning or non-owning ref\n", i); 13782 return -EINVAL; 13783 } 13784 if (!type_is_non_owning_ref(reg->type)) 13785 meta->arg_owning_ref = true; 13786 13787 rec = reg_btf_record(reg); 13788 if (!rec) { 13789 verifier_bug(env, "Couldn't find btf_record"); 13790 return -EFAULT; 13791 } 13792 13793 if (rec->refcount_off < 0) { 13794 verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i); 13795 return -EINVAL; 13796 } 13797 13798 meta->arg_btf = reg->btf; 13799 meta->arg_btf_id = reg->btf_id; 13800 break; 13801 case KF_ARG_PTR_TO_CONST_STR: 13802 if (reg->type != PTR_TO_MAP_VALUE) { 13803 verbose(env, "arg#%d doesn't point to a const string\n", i); 13804 return -EINVAL; 13805 } 13806 ret = check_reg_const_str(env, reg, regno); 13807 if (ret) 13808 return ret; 13809 break; 13810 case KF_ARG_PTR_TO_WORKQUEUE: 13811 if (reg->type != PTR_TO_MAP_VALUE) { 13812 verbose(env, "arg#%d doesn't point to a map value\n", i); 13813 return -EINVAL; 13814 } 13815 ret = check_map_field_pointer(env, regno, BPF_WORKQUEUE, &meta->map); 13816 if (ret < 0) 13817 return ret; 13818 break; 13819 case KF_ARG_PTR_TO_TIMER: 13820 if (reg->type != PTR_TO_MAP_VALUE) { 13821 verbose(env, "arg#%d doesn't point to a map value\n", i); 13822 return -EINVAL; 13823 } 13824 ret = process_timer_kfunc(env, regno, meta); 13825 if (ret < 0) 13826 return ret; 13827 break; 13828 case KF_ARG_PTR_TO_TASK_WORK: 13829 if (reg->type != PTR_TO_MAP_VALUE) { 13830 verbose(env, "arg#%d doesn't point to a map value\n", i); 13831 return -EINVAL; 13832 } 13833 ret = check_map_field_pointer(env, regno, BPF_TASK_WORK, &meta->map); 13834 if (ret < 0) 13835 return ret; 13836 break; 13837 case KF_ARG_PTR_TO_IRQ_FLAG: 13838 if (reg->type != PTR_TO_STACK) { 13839 verbose(env, "arg#%d doesn't point to an irq flag on stack\n", i); 13840 return -EINVAL; 13841 } 13842 ret = process_irq_flag(env, regno, meta); 13843 if (ret < 0) 13844 return ret; 13845 break; 13846 case KF_ARG_PTR_TO_RES_SPIN_LOCK: 13847 { 13848 int flags = PROCESS_RES_LOCK; 13849 13850 if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { 13851 verbose(env, "arg#%d doesn't point to map value or allocated object\n", i); 13852 return -EINVAL; 13853 } 13854 13855 if (!is_bpf_res_spin_lock_kfunc(meta->func_id)) 13856 return -EFAULT; 13857 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock] || 13858 meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) 13859 flags |= PROCESS_SPIN_LOCK; 13860 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] || 13861 meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) 13862 flags |= PROCESS_LOCK_IRQ; 13863 ret = process_spin_lock(env, regno, flags); 13864 if (ret < 0) 13865 return ret; 13866 break; 13867 } 13868 } 13869 } 13870 13871 if (is_kfunc_release(meta) && !meta->release_regno) { 13872 verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n", 13873 func_name); 13874 return -EINVAL; 13875 } 13876 13877 return 0; 13878 } 13879 13880 static int fetch_kfunc_arg_meta(struct bpf_verifier_env *env, 13881 s32 func_id, 13882 s16 offset, 13883 struct bpf_kfunc_call_arg_meta *meta) 13884 { 13885 struct bpf_kfunc_meta kfunc; 13886 int err; 13887 13888 err = fetch_kfunc_meta(env, func_id, offset, &kfunc); 13889 if (err) 13890 return err; 13891 13892 memset(meta, 0, sizeof(*meta)); 13893 meta->btf = kfunc.btf; 13894 meta->func_id = kfunc.id; 13895 meta->func_proto = kfunc.proto; 13896 meta->func_name = kfunc.name; 13897 13898 if (!kfunc.flags || !btf_kfunc_is_allowed(kfunc.btf, kfunc.id, env->prog)) 13899 return -EACCES; 13900 13901 meta->kfunc_flags = *kfunc.flags; 13902 13903 return 0; 13904 } 13905 13906 /* check special kfuncs and return: 13907 * 1 - not fall-through to 'else' branch, continue verification 13908 * 0 - fall-through to 'else' branch 13909 * < 0 - not fall-through to 'else' branch, return error 13910 */ 13911 static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta, 13912 struct bpf_reg_state *regs, struct bpf_insn_aux_data *insn_aux, 13913 const struct btf_type *ptr_type, struct btf *desc_btf) 13914 { 13915 const struct btf_type *ret_t; 13916 int err = 0; 13917 13918 if (meta->btf != btf_vmlinux) 13919 return 0; 13920 13921 if (meta->func_id == special_kfunc_list[KF_bpf_obj_new_impl] || 13922 meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) { 13923 struct btf_struct_meta *struct_meta; 13924 struct btf *ret_btf; 13925 u32 ret_btf_id; 13926 13927 if (meta->func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set) 13928 return -ENOMEM; 13929 13930 if (((u64)(u32)meta->arg_constant.value) != meta->arg_constant.value) { 13931 verbose(env, "local type ID argument must be in range [0, U32_MAX]\n"); 13932 return -EINVAL; 13933 } 13934 13935 ret_btf = env->prog->aux->btf; 13936 ret_btf_id = meta->arg_constant.value; 13937 13938 /* This may be NULL due to user not supplying a BTF */ 13939 if (!ret_btf) { 13940 verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n"); 13941 return -EINVAL; 13942 } 13943 13944 ret_t = btf_type_by_id(ret_btf, ret_btf_id); 13945 if (!ret_t || !__btf_type_is_struct(ret_t)) { 13946 verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n"); 13947 return -EINVAL; 13948 } 13949 13950 if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) { 13951 if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) { 13952 verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n", 13953 ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE); 13954 return -EINVAL; 13955 } 13956 13957 if (!bpf_global_percpu_ma_set) { 13958 mutex_lock(&bpf_percpu_ma_lock); 13959 if (!bpf_global_percpu_ma_set) { 13960 /* Charge memory allocated with bpf_global_percpu_ma to 13961 * root memcg. The obj_cgroup for root memcg is NULL. 13962 */ 13963 err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL); 13964 if (!err) 13965 bpf_global_percpu_ma_set = true; 13966 } 13967 mutex_unlock(&bpf_percpu_ma_lock); 13968 if (err) 13969 return err; 13970 } 13971 13972 mutex_lock(&bpf_percpu_ma_lock); 13973 err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size); 13974 mutex_unlock(&bpf_percpu_ma_lock); 13975 if (err) 13976 return err; 13977 } 13978 13979 struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id); 13980 if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) { 13981 if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) { 13982 verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n"); 13983 return -EINVAL; 13984 } 13985 13986 if (struct_meta) { 13987 verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n"); 13988 return -EINVAL; 13989 } 13990 } 13991 13992 mark_reg_known_zero(env, regs, BPF_REG_0); 13993 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; 13994 regs[BPF_REG_0].btf = ret_btf; 13995 regs[BPF_REG_0].btf_id = ret_btf_id; 13996 if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) 13997 regs[BPF_REG_0].type |= MEM_PERCPU; 13998 13999 insn_aux->obj_new_size = ret_t->size; 14000 insn_aux->kptr_struct_meta = struct_meta; 14001 } else if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) { 14002 mark_reg_known_zero(env, regs, BPF_REG_0); 14003 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; 14004 regs[BPF_REG_0].btf = meta->arg_btf; 14005 regs[BPF_REG_0].btf_id = meta->arg_btf_id; 14006 14007 insn_aux->kptr_struct_meta = 14008 btf_find_struct_meta(meta->arg_btf, 14009 meta->arg_btf_id); 14010 } else if (is_list_node_type(ptr_type)) { 14011 struct btf_field *field = meta->arg_list_head.field; 14012 14013 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root); 14014 } else if (is_rbtree_node_type(ptr_type)) { 14015 struct btf_field *field = meta->arg_rbtree_root.field; 14016 14017 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root); 14018 } else if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { 14019 mark_reg_known_zero(env, regs, BPF_REG_0); 14020 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED; 14021 regs[BPF_REG_0].btf = desc_btf; 14022 regs[BPF_REG_0].btf_id = meta->ret_btf_id; 14023 } else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { 14024 ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value); 14025 if (!ret_t) { 14026 verbose(env, "Unknown type ID %lld passed to kfunc bpf_rdonly_cast\n", 14027 meta->arg_constant.value); 14028 return -EINVAL; 14029 } else if (btf_type_is_struct(ret_t)) { 14030 mark_reg_known_zero(env, regs, BPF_REG_0); 14031 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED; 14032 regs[BPF_REG_0].btf = desc_btf; 14033 regs[BPF_REG_0].btf_id = meta->arg_constant.value; 14034 } else if (btf_type_is_void(ret_t)) { 14035 mark_reg_known_zero(env, regs, BPF_REG_0); 14036 regs[BPF_REG_0].type = PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED; 14037 regs[BPF_REG_0].mem_size = 0; 14038 } else { 14039 verbose(env, 14040 "kfunc bpf_rdonly_cast type ID argument must be of a struct or void\n"); 14041 return -EINVAL; 14042 } 14043 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] || 14044 meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) { 14045 enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->initialized_dynptr.type); 14046 14047 mark_reg_known_zero(env, regs, BPF_REG_0); 14048 14049 if (!meta->arg_constant.found) { 14050 verifier_bug(env, "bpf_dynptr_slice(_rdwr) no constant size"); 14051 return -EFAULT; 14052 } 14053 14054 regs[BPF_REG_0].mem_size = meta->arg_constant.value; 14055 14056 /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */ 14057 regs[BPF_REG_0].type = PTR_TO_MEM | type_flag; 14058 14059 if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice]) { 14060 regs[BPF_REG_0].type |= MEM_RDONLY; 14061 } else { 14062 /* this will set env->seen_direct_write to true */ 14063 if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) { 14064 verbose(env, "the prog does not allow writes to packet data\n"); 14065 return -EINVAL; 14066 } 14067 } 14068 14069 if (!meta->initialized_dynptr.id) { 14070 verifier_bug(env, "no dynptr id"); 14071 return -EFAULT; 14072 } 14073 regs[BPF_REG_0].dynptr_id = meta->initialized_dynptr.id; 14074 14075 /* we don't need to set BPF_REG_0's ref obj id 14076 * because packet slices are not refcounted (see 14077 * dynptr_type_refcounted) 14078 */ 14079 } else { 14080 return 0; 14081 } 14082 14083 return 1; 14084 } 14085 14086 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name); 14087 14088 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 14089 int *insn_idx_p) 14090 { 14091 bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable; 14092 u32 i, nargs, ptr_type_id, release_ref_obj_id; 14093 struct bpf_reg_state *regs = cur_regs(env); 14094 const char *func_name, *ptr_type_name; 14095 const struct btf_type *t, *ptr_type; 14096 struct bpf_kfunc_call_arg_meta meta; 14097 struct bpf_insn_aux_data *insn_aux; 14098 int err, insn_idx = *insn_idx_p; 14099 const struct btf_param *args; 14100 struct btf *desc_btf; 14101 14102 /* skip for now, but return error when we find this in fixup_kfunc_call */ 14103 if (!insn->imm) 14104 return 0; 14105 14106 err = fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta); 14107 if (err == -EACCES && meta.func_name) 14108 verbose(env, "calling kernel function %s is not allowed\n", meta.func_name); 14109 if (err) 14110 return err; 14111 desc_btf = meta.btf; 14112 func_name = meta.func_name; 14113 insn_aux = &env->insn_aux_data[insn_idx]; 14114 14115 insn_aux->is_iter_next = is_iter_next_kfunc(&meta); 14116 14117 if (!insn->off && 14118 (insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] || 14119 insn->imm == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])) { 14120 struct bpf_verifier_state *branch; 14121 struct bpf_reg_state *regs; 14122 14123 branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false); 14124 if (IS_ERR(branch)) { 14125 verbose(env, "failed to push state for failed lock acquisition\n"); 14126 return PTR_ERR(branch); 14127 } 14128 14129 regs = branch->frame[branch->curframe]->regs; 14130 14131 /* Clear r0-r5 registers in forked state */ 14132 for (i = 0; i < CALLER_SAVED_REGS; i++) 14133 mark_reg_not_init(env, regs, caller_saved[i]); 14134 14135 mark_reg_unknown(env, regs, BPF_REG_0); 14136 err = __mark_reg_s32_range(env, regs, BPF_REG_0, -MAX_ERRNO, -1); 14137 if (err) { 14138 verbose(env, "failed to mark s32 range for retval in forked state for lock\n"); 14139 return err; 14140 } 14141 __mark_btf_func_reg_size(env, regs, BPF_REG_0, sizeof(u32)); 14142 } else if (!insn->off && insn->imm == special_kfunc_list[KF___bpf_trap]) { 14143 verbose(env, "unexpected __bpf_trap() due to uninitialized variable?\n"); 14144 return -EFAULT; 14145 } 14146 14147 if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) { 14148 verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n"); 14149 return -EACCES; 14150 } 14151 14152 sleepable = is_kfunc_sleepable(&meta); 14153 if (sleepable && !in_sleepable(env)) { 14154 verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name); 14155 return -EACCES; 14156 } 14157 14158 /* Track non-sleepable context for kfuncs, same as for helpers. */ 14159 if (!in_sleepable_context(env)) 14160 insn_aux->non_sleepable = true; 14161 14162 /* Check the arguments */ 14163 err = check_kfunc_args(env, &meta, insn_idx); 14164 if (err < 0) 14165 return err; 14166 14167 if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { 14168 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 14169 set_rbtree_add_callback_state); 14170 if (err) { 14171 verbose(env, "kfunc %s#%d failed callback verification\n", 14172 func_name, meta.func_id); 14173 return err; 14174 } 14175 } 14176 14177 if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) { 14178 meta.r0_size = sizeof(u64); 14179 meta.r0_rdonly = false; 14180 } 14181 14182 if (is_bpf_wq_set_callback_kfunc(meta.func_id)) { 14183 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 14184 set_timer_callback_state); 14185 if (err) { 14186 verbose(env, "kfunc %s#%d failed callback verification\n", 14187 func_name, meta.func_id); 14188 return err; 14189 } 14190 } 14191 14192 if (is_task_work_add_kfunc(meta.func_id)) { 14193 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 14194 set_task_work_schedule_callback_state); 14195 if (err) { 14196 verbose(env, "kfunc %s#%d failed callback verification\n", 14197 func_name, meta.func_id); 14198 return err; 14199 } 14200 } 14201 14202 rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); 14203 rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); 14204 14205 preempt_disable = is_kfunc_bpf_preempt_disable(&meta); 14206 preempt_enable = is_kfunc_bpf_preempt_enable(&meta); 14207 14208 if (rcu_lock) { 14209 env->cur_state->active_rcu_locks++; 14210 } else if (rcu_unlock) { 14211 struct bpf_func_state *state; 14212 struct bpf_reg_state *reg; 14213 u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER); 14214 14215 if (env->cur_state->active_rcu_locks == 0) { 14216 verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name); 14217 return -EINVAL; 14218 } 14219 if (--env->cur_state->active_rcu_locks == 0) { 14220 bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({ 14221 if (reg->type & MEM_RCU) { 14222 reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL); 14223 reg->type |= PTR_UNTRUSTED; 14224 } 14225 })); 14226 } 14227 } else if (sleepable && env->cur_state->active_rcu_locks) { 14228 verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name); 14229 return -EACCES; 14230 } 14231 14232 if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) { 14233 verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n"); 14234 return -EACCES; 14235 } 14236 14237 if (env->cur_state->active_preempt_locks) { 14238 if (preempt_disable) { 14239 env->cur_state->active_preempt_locks++; 14240 } else if (preempt_enable) { 14241 env->cur_state->active_preempt_locks--; 14242 } else if (sleepable) { 14243 verbose(env, "kernel func %s is sleepable within non-preemptible region\n", func_name); 14244 return -EACCES; 14245 } 14246 } else if (preempt_disable) { 14247 env->cur_state->active_preempt_locks++; 14248 } else if (preempt_enable) { 14249 verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name); 14250 return -EINVAL; 14251 } 14252 14253 if (env->cur_state->active_irq_id && sleepable) { 14254 verbose(env, "kernel func %s is sleepable within IRQ-disabled region\n", func_name); 14255 return -EACCES; 14256 } 14257 14258 if (is_kfunc_rcu_protected(&meta) && !in_rcu_cs(env)) { 14259 verbose(env, "kernel func %s requires RCU critical section protection\n", func_name); 14260 return -EACCES; 14261 } 14262 14263 /* In case of release function, we get register number of refcounted 14264 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. 14265 */ 14266 if (meta.release_regno) { 14267 struct bpf_reg_state *reg = ®s[meta.release_regno]; 14268 14269 if (meta.initialized_dynptr.ref_obj_id) { 14270 err = unmark_stack_slots_dynptr(env, reg); 14271 } else { 14272 err = release_reference(env, reg->ref_obj_id); 14273 if (err) 14274 verbose(env, "kfunc %s#%d reference has not been acquired before\n", 14275 func_name, meta.func_id); 14276 } 14277 if (err) 14278 return err; 14279 } 14280 14281 if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] || 14282 meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] || 14283 meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { 14284 release_ref_obj_id = regs[BPF_REG_2].ref_obj_id; 14285 insn_aux->insert_off = regs[BPF_REG_2].off; 14286 insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id); 14287 err = ref_convert_owning_non_owning(env, release_ref_obj_id); 14288 if (err) { 14289 verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n", 14290 func_name, meta.func_id); 14291 return err; 14292 } 14293 14294 err = release_reference(env, release_ref_obj_id); 14295 if (err) { 14296 verbose(env, "kfunc %s#%d reference has not been acquired before\n", 14297 func_name, meta.func_id); 14298 return err; 14299 } 14300 } 14301 14302 if (meta.func_id == special_kfunc_list[KF_bpf_throw]) { 14303 if (!bpf_jit_supports_exceptions()) { 14304 verbose(env, "JIT does not support calling kfunc %s#%d\n", 14305 func_name, meta.func_id); 14306 return -ENOTSUPP; 14307 } 14308 env->seen_exception = true; 14309 14310 /* In the case of the default callback, the cookie value passed 14311 * to bpf_throw becomes the return value of the program. 14312 */ 14313 if (!env->exception_callback_subprog) { 14314 err = check_return_code(env, BPF_REG_1, "R1"); 14315 if (err < 0) 14316 return err; 14317 } 14318 } 14319 14320 for (i = 0; i < CALLER_SAVED_REGS; i++) { 14321 u32 regno = caller_saved[i]; 14322 14323 mark_reg_not_init(env, regs, regno); 14324 regs[regno].subreg_def = DEF_NOT_SUBREG; 14325 } 14326 14327 /* Check return type */ 14328 t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL); 14329 14330 if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) { 14331 /* Only exception is bpf_obj_new_impl */ 14332 if (meta.btf != btf_vmlinux || 14333 (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] && 14334 meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] && 14335 meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) { 14336 verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); 14337 return -EINVAL; 14338 } 14339 } 14340 14341 if (btf_type_is_scalar(t)) { 14342 mark_reg_unknown(env, regs, BPF_REG_0); 14343 if (meta.btf == btf_vmlinux && (meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock] || 14344 meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])) 14345 __mark_reg_const_zero(env, ®s[BPF_REG_0]); 14346 mark_btf_func_reg_size(env, BPF_REG_0, t->size); 14347 } else if (btf_type_is_ptr(t)) { 14348 ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id); 14349 err = check_special_kfunc(env, &meta, regs, insn_aux, ptr_type, desc_btf); 14350 if (err) { 14351 if (err < 0) 14352 return err; 14353 } else if (btf_type_is_void(ptr_type)) { 14354 /* kfunc returning 'void *' is equivalent to returning scalar */ 14355 mark_reg_unknown(env, regs, BPF_REG_0); 14356 } else if (!__btf_type_is_struct(ptr_type)) { 14357 if (!meta.r0_size) { 14358 __u32 sz; 14359 14360 if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) { 14361 meta.r0_size = sz; 14362 meta.r0_rdonly = true; 14363 } 14364 } 14365 if (!meta.r0_size) { 14366 ptr_type_name = btf_name_by_offset(desc_btf, 14367 ptr_type->name_off); 14368 verbose(env, 14369 "kernel function %s returns pointer type %s %s is not supported\n", 14370 func_name, 14371 btf_type_str(ptr_type), 14372 ptr_type_name); 14373 return -EINVAL; 14374 } 14375 14376 mark_reg_known_zero(env, regs, BPF_REG_0); 14377 regs[BPF_REG_0].type = PTR_TO_MEM; 14378 regs[BPF_REG_0].mem_size = meta.r0_size; 14379 14380 if (meta.r0_rdonly) 14381 regs[BPF_REG_0].type |= MEM_RDONLY; 14382 14383 /* Ensures we don't access the memory after a release_reference() */ 14384 if (meta.ref_obj_id) 14385 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; 14386 14387 if (is_kfunc_rcu_protected(&meta)) 14388 regs[BPF_REG_0].type |= MEM_RCU; 14389 } else { 14390 enum bpf_reg_type type = PTR_TO_BTF_ID; 14391 14392 if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache]) 14393 type |= PTR_UNTRUSTED; 14394 else if (is_kfunc_rcu_protected(&meta) || 14395 (is_iter_next_kfunc(&meta) && 14396 (get_iter_from_state(env->cur_state, &meta) 14397 ->type & MEM_RCU))) { 14398 /* 14399 * If the iterator's constructor (the _new 14400 * function e.g., bpf_iter_task_new) has been 14401 * annotated with BPF kfunc flag 14402 * KF_RCU_PROTECTED and was called within a RCU 14403 * read-side critical section, also propagate 14404 * the MEM_RCU flag to the pointer returned from 14405 * the iterator's next function (e.g., 14406 * bpf_iter_task_next). 14407 */ 14408 type |= MEM_RCU; 14409 } else { 14410 /* 14411 * Any PTR_TO_BTF_ID that is returned from a BPF 14412 * kfunc should by default be treated as 14413 * implicitly trusted. 14414 */ 14415 type |= PTR_TRUSTED; 14416 } 14417 14418 mark_reg_known_zero(env, regs, BPF_REG_0); 14419 regs[BPF_REG_0].btf = desc_btf; 14420 regs[BPF_REG_0].type = type; 14421 regs[BPF_REG_0].btf_id = ptr_type_id; 14422 } 14423 14424 if (is_kfunc_ret_null(&meta)) { 14425 regs[BPF_REG_0].type |= PTR_MAYBE_NULL; 14426 /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */ 14427 regs[BPF_REG_0].id = ++env->id_gen; 14428 } 14429 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); 14430 if (is_kfunc_acquire(&meta)) { 14431 int id = acquire_reference(env, insn_idx); 14432 14433 if (id < 0) 14434 return id; 14435 if (is_kfunc_ret_null(&meta)) 14436 regs[BPF_REG_0].id = id; 14437 regs[BPF_REG_0].ref_obj_id = id; 14438 } else if (is_rbtree_node_type(ptr_type) || is_list_node_type(ptr_type)) { 14439 ref_set_non_owning(env, ®s[BPF_REG_0]); 14440 } 14441 14442 if (reg_may_point_to_spin_lock(®s[BPF_REG_0]) && !regs[BPF_REG_0].id) 14443 regs[BPF_REG_0].id = ++env->id_gen; 14444 } else if (btf_type_is_void(t)) { 14445 if (meta.btf == btf_vmlinux) { 14446 if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] || 14447 meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) { 14448 insn_aux->kptr_struct_meta = 14449 btf_find_struct_meta(meta.arg_btf, 14450 meta.arg_btf_id); 14451 } 14452 } 14453 } 14454 14455 if (is_kfunc_pkt_changing(&meta)) 14456 clear_all_pkt_pointers(env); 14457 14458 nargs = btf_type_vlen(meta.func_proto); 14459 args = (const struct btf_param *)(meta.func_proto + 1); 14460 for (i = 0; i < nargs; i++) { 14461 u32 regno = i + 1; 14462 14463 t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL); 14464 if (btf_type_is_ptr(t)) 14465 mark_btf_func_reg_size(env, regno, sizeof(void *)); 14466 else 14467 /* scalar. ensured by btf_check_kfunc_arg_match() */ 14468 mark_btf_func_reg_size(env, regno, t->size); 14469 } 14470 14471 if (is_iter_next_kfunc(&meta)) { 14472 err = process_iter_next_call(env, insn_idx, &meta); 14473 if (err) 14474 return err; 14475 } 14476 14477 if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) 14478 env->prog->call_session_cookie = true; 14479 14480 return 0; 14481 } 14482 14483 static bool check_reg_sane_offset(struct bpf_verifier_env *env, 14484 const struct bpf_reg_state *reg, 14485 enum bpf_reg_type type) 14486 { 14487 bool known = tnum_is_const(reg->var_off); 14488 s64 val = reg->var_off.value; 14489 s64 smin = reg->smin_value; 14490 14491 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { 14492 verbose(env, "math between %s pointer and %lld is not allowed\n", 14493 reg_type_str(env, type), val); 14494 return false; 14495 } 14496 14497 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { 14498 verbose(env, "%s pointer offset %d is not allowed\n", 14499 reg_type_str(env, type), reg->off); 14500 return false; 14501 } 14502 14503 if (smin == S64_MIN) { 14504 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", 14505 reg_type_str(env, type)); 14506 return false; 14507 } 14508 14509 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { 14510 verbose(env, "value %lld makes %s pointer be out of bounds\n", 14511 smin, reg_type_str(env, type)); 14512 return false; 14513 } 14514 14515 return true; 14516 } 14517 14518 enum { 14519 REASON_BOUNDS = -1, 14520 REASON_TYPE = -2, 14521 REASON_PATHS = -3, 14522 REASON_LIMIT = -4, 14523 REASON_STACK = -5, 14524 }; 14525 14526 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, 14527 u32 *alu_limit, bool mask_to_left) 14528 { 14529 u32 max = 0, ptr_limit = 0; 14530 14531 switch (ptr_reg->type) { 14532 case PTR_TO_STACK: 14533 /* Offset 0 is out-of-bounds, but acceptable start for the 14534 * left direction, see BPF_REG_FP. Also, unknown scalar 14535 * offset where we would need to deal with min/max bounds is 14536 * currently prohibited for unprivileged. 14537 */ 14538 max = MAX_BPF_STACK + mask_to_left; 14539 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off); 14540 break; 14541 case PTR_TO_MAP_VALUE: 14542 max = ptr_reg->map_ptr->value_size; 14543 ptr_limit = (mask_to_left ? 14544 ptr_reg->smin_value : 14545 ptr_reg->umax_value) + ptr_reg->off; 14546 break; 14547 default: 14548 return REASON_TYPE; 14549 } 14550 14551 if (ptr_limit >= max) 14552 return REASON_LIMIT; 14553 *alu_limit = ptr_limit; 14554 return 0; 14555 } 14556 14557 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, 14558 const struct bpf_insn *insn) 14559 { 14560 return env->bypass_spec_v1 || 14561 BPF_SRC(insn->code) == BPF_K || 14562 cur_aux(env)->nospec; 14563 } 14564 14565 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, 14566 u32 alu_state, u32 alu_limit) 14567 { 14568 /* If we arrived here from different branches with different 14569 * state or limits to sanitize, then this won't work. 14570 */ 14571 if (aux->alu_state && 14572 (aux->alu_state != alu_state || 14573 aux->alu_limit != alu_limit)) 14574 return REASON_PATHS; 14575 14576 /* Corresponding fixup done in do_misc_fixups(). */ 14577 aux->alu_state = alu_state; 14578 aux->alu_limit = alu_limit; 14579 return 0; 14580 } 14581 14582 static int sanitize_val_alu(struct bpf_verifier_env *env, 14583 struct bpf_insn *insn) 14584 { 14585 struct bpf_insn_aux_data *aux = cur_aux(env); 14586 14587 if (can_skip_alu_sanitation(env, insn)) 14588 return 0; 14589 14590 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); 14591 } 14592 14593 static bool sanitize_needed(u8 opcode) 14594 { 14595 return opcode == BPF_ADD || opcode == BPF_SUB; 14596 } 14597 14598 struct bpf_sanitize_info { 14599 struct bpf_insn_aux_data aux; 14600 bool mask_to_left; 14601 }; 14602 14603 static int sanitize_speculative_path(struct bpf_verifier_env *env, 14604 const struct bpf_insn *insn, 14605 u32 next_idx, u32 curr_idx) 14606 { 14607 struct bpf_verifier_state *branch; 14608 struct bpf_reg_state *regs; 14609 14610 branch = push_stack(env, next_idx, curr_idx, true); 14611 if (!IS_ERR(branch) && insn) { 14612 regs = branch->frame[branch->curframe]->regs; 14613 if (BPF_SRC(insn->code) == BPF_K) { 14614 mark_reg_unknown(env, regs, insn->dst_reg); 14615 } else if (BPF_SRC(insn->code) == BPF_X) { 14616 mark_reg_unknown(env, regs, insn->dst_reg); 14617 mark_reg_unknown(env, regs, insn->src_reg); 14618 } 14619 } 14620 return PTR_ERR_OR_ZERO(branch); 14621 } 14622 14623 static int sanitize_ptr_alu(struct bpf_verifier_env *env, 14624 struct bpf_insn *insn, 14625 const struct bpf_reg_state *ptr_reg, 14626 const struct bpf_reg_state *off_reg, 14627 struct bpf_reg_state *dst_reg, 14628 struct bpf_sanitize_info *info, 14629 const bool commit_window) 14630 { 14631 struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux; 14632 struct bpf_verifier_state *vstate = env->cur_state; 14633 bool off_is_imm = tnum_is_const(off_reg->var_off); 14634 bool off_is_neg = off_reg->smin_value < 0; 14635 bool ptr_is_dst_reg = ptr_reg == dst_reg; 14636 u8 opcode = BPF_OP(insn->code); 14637 u32 alu_state, alu_limit; 14638 struct bpf_reg_state tmp; 14639 int err; 14640 14641 if (can_skip_alu_sanitation(env, insn)) 14642 return 0; 14643 14644 /* We already marked aux for masking from non-speculative 14645 * paths, thus we got here in the first place. We only care 14646 * to explore bad access from here. 14647 */ 14648 if (vstate->speculative) 14649 goto do_sim; 14650 14651 if (!commit_window) { 14652 if (!tnum_is_const(off_reg->var_off) && 14653 (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) 14654 return REASON_BOUNDS; 14655 14656 info->mask_to_left = (opcode == BPF_ADD && off_is_neg) || 14657 (opcode == BPF_SUB && !off_is_neg); 14658 } 14659 14660 err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left); 14661 if (err < 0) 14662 return err; 14663 14664 if (commit_window) { 14665 /* In commit phase we narrow the masking window based on 14666 * the observed pointer move after the simulated operation. 14667 */ 14668 alu_state = info->aux.alu_state; 14669 alu_limit = abs(info->aux.alu_limit - alu_limit); 14670 } else { 14671 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; 14672 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0; 14673 alu_state |= ptr_is_dst_reg ? 14674 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; 14675 14676 /* Limit pruning on unknown scalars to enable deep search for 14677 * potential masking differences from other program paths. 14678 */ 14679 if (!off_is_imm) 14680 env->explore_alu_limits = true; 14681 } 14682 14683 err = update_alu_sanitation_state(aux, alu_state, alu_limit); 14684 if (err < 0) 14685 return err; 14686 do_sim: 14687 /* If we're in commit phase, we're done here given we already 14688 * pushed the truncated dst_reg into the speculative verification 14689 * stack. 14690 * 14691 * Also, when register is a known constant, we rewrite register-based 14692 * operation to immediate-based, and thus do not need masking (and as 14693 * a consequence, do not need to simulate the zero-truncation either). 14694 */ 14695 if (commit_window || off_is_imm) 14696 return 0; 14697 14698 /* Simulate and find potential out-of-bounds access under 14699 * speculative execution from truncation as a result of 14700 * masking when off was not within expected range. If off 14701 * sits in dst, then we temporarily need to move ptr there 14702 * to simulate dst (== 0) +/-= ptr. Needed, for example, 14703 * for cases where we use K-based arithmetic in one direction 14704 * and truncated reg-based in the other in order to explore 14705 * bad access. 14706 */ 14707 if (!ptr_is_dst_reg) { 14708 tmp = *dst_reg; 14709 copy_register_state(dst_reg, ptr_reg); 14710 } 14711 err = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx); 14712 if (err < 0) 14713 return REASON_STACK; 14714 if (!ptr_is_dst_reg) 14715 *dst_reg = tmp; 14716 return 0; 14717 } 14718 14719 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env) 14720 { 14721 struct bpf_verifier_state *vstate = env->cur_state; 14722 14723 /* If we simulate paths under speculation, we don't update the 14724 * insn as 'seen' such that when we verify unreachable paths in 14725 * the non-speculative domain, sanitize_dead_code() can still 14726 * rewrite/sanitize them. 14727 */ 14728 if (!vstate->speculative) 14729 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; 14730 } 14731 14732 static int sanitize_err(struct bpf_verifier_env *env, 14733 const struct bpf_insn *insn, int reason, 14734 const struct bpf_reg_state *off_reg, 14735 const struct bpf_reg_state *dst_reg) 14736 { 14737 static const char *err = "pointer arithmetic with it prohibited for !root"; 14738 const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub"; 14739 u32 dst = insn->dst_reg, src = insn->src_reg; 14740 14741 switch (reason) { 14742 case REASON_BOUNDS: 14743 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n", 14744 off_reg == dst_reg ? dst : src, err); 14745 break; 14746 case REASON_TYPE: 14747 verbose(env, "R%d has pointer with unsupported alu operation, %s\n", 14748 off_reg == dst_reg ? src : dst, err); 14749 break; 14750 case REASON_PATHS: 14751 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n", 14752 dst, op, err); 14753 break; 14754 case REASON_LIMIT: 14755 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n", 14756 dst, op, err); 14757 break; 14758 case REASON_STACK: 14759 verbose(env, "R%d could not be pushed for speculative verification, %s\n", 14760 dst, err); 14761 return -ENOMEM; 14762 default: 14763 verifier_bug(env, "unknown reason (%d)", reason); 14764 break; 14765 } 14766 14767 return -EACCES; 14768 } 14769 14770 /* check that stack access falls within stack limits and that 'reg' doesn't 14771 * have a variable offset. 14772 * 14773 * Variable offset is prohibited for unprivileged mode for simplicity since it 14774 * requires corresponding support in Spectre masking for stack ALU. See also 14775 * retrieve_ptr_limit(). 14776 * 14777 * 14778 * 'off' includes 'reg->off'. 14779 */ 14780 static int check_stack_access_for_ptr_arithmetic( 14781 struct bpf_verifier_env *env, 14782 int regno, 14783 const struct bpf_reg_state *reg, 14784 int off) 14785 { 14786 if (!tnum_is_const(reg->var_off)) { 14787 char tn_buf[48]; 14788 14789 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 14790 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n", 14791 regno, tn_buf, off); 14792 return -EACCES; 14793 } 14794 14795 if (off >= 0 || off < -MAX_BPF_STACK) { 14796 verbose(env, "R%d stack pointer arithmetic goes out of range, " 14797 "prohibited for !root; off=%d\n", regno, off); 14798 return -EACCES; 14799 } 14800 14801 return 0; 14802 } 14803 14804 static int sanitize_check_bounds(struct bpf_verifier_env *env, 14805 const struct bpf_insn *insn, 14806 const struct bpf_reg_state *dst_reg) 14807 { 14808 u32 dst = insn->dst_reg; 14809 14810 /* For unprivileged we require that resulting offset must be in bounds 14811 * in order to be able to sanitize access later on. 14812 */ 14813 if (env->bypass_spec_v1) 14814 return 0; 14815 14816 switch (dst_reg->type) { 14817 case PTR_TO_STACK: 14818 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg, 14819 dst_reg->off + dst_reg->var_off.value)) 14820 return -EACCES; 14821 break; 14822 case PTR_TO_MAP_VALUE: 14823 if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) { 14824 verbose(env, "R%d pointer arithmetic of map value goes out of range, " 14825 "prohibited for !root\n", dst); 14826 return -EACCES; 14827 } 14828 break; 14829 default: 14830 return -EOPNOTSUPP; 14831 } 14832 14833 return 0; 14834 } 14835 14836 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. 14837 * Caller should also handle BPF_MOV case separately. 14838 * If we return -EACCES, caller may want to try again treating pointer as a 14839 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks. 14840 */ 14841 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, 14842 struct bpf_insn *insn, 14843 const struct bpf_reg_state *ptr_reg, 14844 const struct bpf_reg_state *off_reg) 14845 { 14846 struct bpf_verifier_state *vstate = env->cur_state; 14847 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 14848 struct bpf_reg_state *regs = state->regs, *dst_reg; 14849 bool known = tnum_is_const(off_reg->var_off); 14850 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value, 14851 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; 14852 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, 14853 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; 14854 struct bpf_sanitize_info info = {}; 14855 u8 opcode = BPF_OP(insn->code); 14856 u32 dst = insn->dst_reg; 14857 int ret, bounds_ret; 14858 14859 dst_reg = ®s[dst]; 14860 14861 if ((known && (smin_val != smax_val || umin_val != umax_val)) || 14862 smin_val > smax_val || umin_val > umax_val) { 14863 /* Taint dst register if offset had invalid bounds derived from 14864 * e.g. dead branches. 14865 */ 14866 __mark_reg_unknown(env, dst_reg); 14867 return 0; 14868 } 14869 14870 if (BPF_CLASS(insn->code) != BPF_ALU64) { 14871 /* 32-bit ALU ops on pointers produce (meaningless) scalars */ 14872 if (opcode == BPF_SUB && env->allow_ptr_leaks) { 14873 __mark_reg_unknown(env, dst_reg); 14874 return 0; 14875 } 14876 14877 verbose(env, 14878 "R%d 32-bit pointer arithmetic prohibited\n", 14879 dst); 14880 return -EACCES; 14881 } 14882 14883 if (ptr_reg->type & PTR_MAYBE_NULL) { 14884 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", 14885 dst, reg_type_str(env, ptr_reg->type)); 14886 return -EACCES; 14887 } 14888 14889 /* 14890 * Accesses to untrusted PTR_TO_MEM are done through probe 14891 * instructions, hence no need to track offsets. 14892 */ 14893 if (base_type(ptr_reg->type) == PTR_TO_MEM && (ptr_reg->type & PTR_UNTRUSTED)) 14894 return 0; 14895 14896 switch (base_type(ptr_reg->type)) { 14897 case PTR_TO_CTX: 14898 case PTR_TO_MAP_VALUE: 14899 case PTR_TO_MAP_KEY: 14900 case PTR_TO_STACK: 14901 case PTR_TO_PACKET_META: 14902 case PTR_TO_PACKET: 14903 case PTR_TO_TP_BUFFER: 14904 case PTR_TO_BTF_ID: 14905 case PTR_TO_MEM: 14906 case PTR_TO_BUF: 14907 case PTR_TO_FUNC: 14908 case CONST_PTR_TO_DYNPTR: 14909 break; 14910 case PTR_TO_FLOW_KEYS: 14911 if (known) 14912 break; 14913 fallthrough; 14914 case CONST_PTR_TO_MAP: 14915 /* smin_val represents the known value */ 14916 if (known && smin_val == 0 && opcode == BPF_ADD) 14917 break; 14918 fallthrough; 14919 default: 14920 verbose(env, "R%d pointer arithmetic on %s prohibited\n", 14921 dst, reg_type_str(env, ptr_reg->type)); 14922 return -EACCES; 14923 } 14924 14925 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id. 14926 * The id may be overwritten later if we create a new variable offset. 14927 */ 14928 dst_reg->type = ptr_reg->type; 14929 dst_reg->id = ptr_reg->id; 14930 14931 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) || 14932 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) 14933 return -EINVAL; 14934 14935 /* pointer types do not carry 32-bit bounds at the moment. */ 14936 __mark_reg32_unbounded(dst_reg); 14937 14938 if (sanitize_needed(opcode)) { 14939 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg, 14940 &info, false); 14941 if (ret < 0) 14942 return sanitize_err(env, insn, ret, off_reg, dst_reg); 14943 } 14944 14945 switch (opcode) { 14946 case BPF_ADD: 14947 /* We can take a fixed offset as long as it doesn't overflow 14948 * the s32 'off' field 14949 */ 14950 if (known && (ptr_reg->off + smin_val == 14951 (s64)(s32)(ptr_reg->off + smin_val))) { 14952 /* pointer += K. Accumulate it into fixed offset */ 14953 dst_reg->smin_value = smin_ptr; 14954 dst_reg->smax_value = smax_ptr; 14955 dst_reg->umin_value = umin_ptr; 14956 dst_reg->umax_value = umax_ptr; 14957 dst_reg->var_off = ptr_reg->var_off; 14958 dst_reg->off = ptr_reg->off + smin_val; 14959 dst_reg->raw = ptr_reg->raw; 14960 break; 14961 } 14962 /* A new variable offset is created. Note that off_reg->off 14963 * == 0, since it's a scalar. 14964 * dst_reg gets the pointer type and since some positive 14965 * integer value was added to the pointer, give it a new 'id' 14966 * if it's a PTR_TO_PACKET. 14967 * this creates a new 'base' pointer, off_reg (variable) gets 14968 * added into the variable offset, and we copy the fixed offset 14969 * from ptr_reg. 14970 */ 14971 if (check_add_overflow(smin_ptr, smin_val, &dst_reg->smin_value) || 14972 check_add_overflow(smax_ptr, smax_val, &dst_reg->smax_value)) { 14973 dst_reg->smin_value = S64_MIN; 14974 dst_reg->smax_value = S64_MAX; 14975 } 14976 if (check_add_overflow(umin_ptr, umin_val, &dst_reg->umin_value) || 14977 check_add_overflow(umax_ptr, umax_val, &dst_reg->umax_value)) { 14978 dst_reg->umin_value = 0; 14979 dst_reg->umax_value = U64_MAX; 14980 } 14981 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off); 14982 dst_reg->off = ptr_reg->off; 14983 dst_reg->raw = ptr_reg->raw; 14984 if (reg_is_pkt_pointer(ptr_reg)) { 14985 dst_reg->id = ++env->id_gen; 14986 /* something was added to pkt_ptr, set range to zero */ 14987 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw)); 14988 } 14989 break; 14990 case BPF_SUB: 14991 if (dst_reg == off_reg) { 14992 /* scalar -= pointer. Creates an unknown scalar */ 14993 verbose(env, "R%d tried to subtract pointer from scalar\n", 14994 dst); 14995 return -EACCES; 14996 } 14997 /* We don't allow subtraction from FP, because (according to 14998 * test_verifier.c test "invalid fp arithmetic", JITs might not 14999 * be able to deal with it. 15000 */ 15001 if (ptr_reg->type == PTR_TO_STACK) { 15002 verbose(env, "R%d subtraction from stack pointer prohibited\n", 15003 dst); 15004 return -EACCES; 15005 } 15006 if (known && (ptr_reg->off - smin_val == 15007 (s64)(s32)(ptr_reg->off - smin_val))) { 15008 /* pointer -= K. Subtract it from fixed offset */ 15009 dst_reg->smin_value = smin_ptr; 15010 dst_reg->smax_value = smax_ptr; 15011 dst_reg->umin_value = umin_ptr; 15012 dst_reg->umax_value = umax_ptr; 15013 dst_reg->var_off = ptr_reg->var_off; 15014 dst_reg->id = ptr_reg->id; 15015 dst_reg->off = ptr_reg->off - smin_val; 15016 dst_reg->raw = ptr_reg->raw; 15017 break; 15018 } 15019 /* A new variable offset is created. If the subtrahend is known 15020 * nonnegative, then any reg->range we had before is still good. 15021 */ 15022 if (check_sub_overflow(smin_ptr, smax_val, &dst_reg->smin_value) || 15023 check_sub_overflow(smax_ptr, smin_val, &dst_reg->smax_value)) { 15024 /* Overflow possible, we know nothing */ 15025 dst_reg->smin_value = S64_MIN; 15026 dst_reg->smax_value = S64_MAX; 15027 } 15028 if (umin_ptr < umax_val) { 15029 /* Overflow possible, we know nothing */ 15030 dst_reg->umin_value = 0; 15031 dst_reg->umax_value = U64_MAX; 15032 } else { 15033 /* Cannot overflow (as long as bounds are consistent) */ 15034 dst_reg->umin_value = umin_ptr - umax_val; 15035 dst_reg->umax_value = umax_ptr - umin_val; 15036 } 15037 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off); 15038 dst_reg->off = ptr_reg->off; 15039 dst_reg->raw = ptr_reg->raw; 15040 if (reg_is_pkt_pointer(ptr_reg)) { 15041 dst_reg->id = ++env->id_gen; 15042 /* something was added to pkt_ptr, set range to zero */ 15043 if (smin_val < 0) 15044 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw)); 15045 } 15046 break; 15047 case BPF_AND: 15048 case BPF_OR: 15049 case BPF_XOR: 15050 /* bitwise ops on pointers are troublesome, prohibit. */ 15051 verbose(env, "R%d bitwise operator %s on pointer prohibited\n", 15052 dst, bpf_alu_string[opcode >> 4]); 15053 return -EACCES; 15054 default: 15055 /* other operators (e.g. MUL,LSH) produce non-pointer results */ 15056 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", 15057 dst, bpf_alu_string[opcode >> 4]); 15058 return -EACCES; 15059 } 15060 15061 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) 15062 return -EINVAL; 15063 reg_bounds_sync(dst_reg); 15064 bounds_ret = sanitize_check_bounds(env, insn, dst_reg); 15065 if (bounds_ret == -EACCES) 15066 return bounds_ret; 15067 if (sanitize_needed(opcode)) { 15068 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg, 15069 &info, true); 15070 if (verifier_bug_if(!can_skip_alu_sanitation(env, insn) 15071 && !env->cur_state->speculative 15072 && bounds_ret 15073 && !ret, 15074 env, "Pointer type unsupported by sanitize_check_bounds() not rejected by retrieve_ptr_limit() as required")) { 15075 return -EFAULT; 15076 } 15077 if (ret < 0) 15078 return sanitize_err(env, insn, ret, off_reg, dst_reg); 15079 } 15080 15081 return 0; 15082 } 15083 15084 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, 15085 struct bpf_reg_state *src_reg) 15086 { 15087 s32 *dst_smin = &dst_reg->s32_min_value; 15088 s32 *dst_smax = &dst_reg->s32_max_value; 15089 u32 *dst_umin = &dst_reg->u32_min_value; 15090 u32 *dst_umax = &dst_reg->u32_max_value; 15091 u32 umin_val = src_reg->u32_min_value; 15092 u32 umax_val = src_reg->u32_max_value; 15093 bool min_overflow, max_overflow; 15094 15095 if (check_add_overflow(*dst_smin, src_reg->s32_min_value, dst_smin) || 15096 check_add_overflow(*dst_smax, src_reg->s32_max_value, dst_smax)) { 15097 *dst_smin = S32_MIN; 15098 *dst_smax = S32_MAX; 15099 } 15100 15101 /* If either all additions overflow or no additions overflow, then 15102 * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax = 15103 * dst_umax + src_umax. Otherwise (some additions overflow), set 15104 * the output bounds to unbounded. 15105 */ 15106 min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin); 15107 max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax); 15108 15109 if (!min_overflow && max_overflow) { 15110 *dst_umin = 0; 15111 *dst_umax = U32_MAX; 15112 } 15113 } 15114 15115 static void scalar_min_max_add(struct bpf_reg_state *dst_reg, 15116 struct bpf_reg_state *src_reg) 15117 { 15118 s64 *dst_smin = &dst_reg->smin_value; 15119 s64 *dst_smax = &dst_reg->smax_value; 15120 u64 *dst_umin = &dst_reg->umin_value; 15121 u64 *dst_umax = &dst_reg->umax_value; 15122 u64 umin_val = src_reg->umin_value; 15123 u64 umax_val = src_reg->umax_value; 15124 bool min_overflow, max_overflow; 15125 15126 if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) || 15127 check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) { 15128 *dst_smin = S64_MIN; 15129 *dst_smax = S64_MAX; 15130 } 15131 15132 /* If either all additions overflow or no additions overflow, then 15133 * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax = 15134 * dst_umax + src_umax. Otherwise (some additions overflow), set 15135 * the output bounds to unbounded. 15136 */ 15137 min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin); 15138 max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax); 15139 15140 if (!min_overflow && max_overflow) { 15141 *dst_umin = 0; 15142 *dst_umax = U64_MAX; 15143 } 15144 } 15145 15146 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, 15147 struct bpf_reg_state *src_reg) 15148 { 15149 s32 *dst_smin = &dst_reg->s32_min_value; 15150 s32 *dst_smax = &dst_reg->s32_max_value; 15151 u32 *dst_umin = &dst_reg->u32_min_value; 15152 u32 *dst_umax = &dst_reg->u32_max_value; 15153 u32 umin_val = src_reg->u32_min_value; 15154 u32 umax_val = src_reg->u32_max_value; 15155 bool min_underflow, max_underflow; 15156 15157 if (check_sub_overflow(*dst_smin, src_reg->s32_max_value, dst_smin) || 15158 check_sub_overflow(*dst_smax, src_reg->s32_min_value, dst_smax)) { 15159 /* Overflow possible, we know nothing */ 15160 *dst_smin = S32_MIN; 15161 *dst_smax = S32_MAX; 15162 } 15163 15164 /* If either all subtractions underflow or no subtractions 15165 * underflow, it is okay to set: dst_umin = dst_umin - src_umax, 15166 * dst_umax = dst_umax - src_umin. Otherwise (some subtractions 15167 * underflow), set the output bounds to unbounded. 15168 */ 15169 min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin); 15170 max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax); 15171 15172 if (min_underflow && !max_underflow) { 15173 *dst_umin = 0; 15174 *dst_umax = U32_MAX; 15175 } 15176 } 15177 15178 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, 15179 struct bpf_reg_state *src_reg) 15180 { 15181 s64 *dst_smin = &dst_reg->smin_value; 15182 s64 *dst_smax = &dst_reg->smax_value; 15183 u64 *dst_umin = &dst_reg->umin_value; 15184 u64 *dst_umax = &dst_reg->umax_value; 15185 u64 umin_val = src_reg->umin_value; 15186 u64 umax_val = src_reg->umax_value; 15187 bool min_underflow, max_underflow; 15188 15189 if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) || 15190 check_sub_overflow(*dst_smax, src_reg->smin_value, dst_smax)) { 15191 /* Overflow possible, we know nothing */ 15192 *dst_smin = S64_MIN; 15193 *dst_smax = S64_MAX; 15194 } 15195 15196 /* If either all subtractions underflow or no subtractions 15197 * underflow, it is okay to set: dst_umin = dst_umin - src_umax, 15198 * dst_umax = dst_umax - src_umin. Otherwise (some subtractions 15199 * underflow), set the output bounds to unbounded. 15200 */ 15201 min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin); 15202 max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax); 15203 15204 if (min_underflow && !max_underflow) { 15205 *dst_umin = 0; 15206 *dst_umax = U64_MAX; 15207 } 15208 } 15209 15210 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg, 15211 struct bpf_reg_state *src_reg) 15212 { 15213 s32 *dst_smin = &dst_reg->s32_min_value; 15214 s32 *dst_smax = &dst_reg->s32_max_value; 15215 u32 *dst_umin = &dst_reg->u32_min_value; 15216 u32 *dst_umax = &dst_reg->u32_max_value; 15217 s32 tmp_prod[4]; 15218 15219 if (check_mul_overflow(*dst_umax, src_reg->u32_max_value, dst_umax) || 15220 check_mul_overflow(*dst_umin, src_reg->u32_min_value, dst_umin)) { 15221 /* Overflow possible, we know nothing */ 15222 *dst_umin = 0; 15223 *dst_umax = U32_MAX; 15224 } 15225 if (check_mul_overflow(*dst_smin, src_reg->s32_min_value, &tmp_prod[0]) || 15226 check_mul_overflow(*dst_smin, src_reg->s32_max_value, &tmp_prod[1]) || 15227 check_mul_overflow(*dst_smax, src_reg->s32_min_value, &tmp_prod[2]) || 15228 check_mul_overflow(*dst_smax, src_reg->s32_max_value, &tmp_prod[3])) { 15229 /* Overflow possible, we know nothing */ 15230 *dst_smin = S32_MIN; 15231 *dst_smax = S32_MAX; 15232 } else { 15233 *dst_smin = min_array(tmp_prod, 4); 15234 *dst_smax = max_array(tmp_prod, 4); 15235 } 15236 } 15237 15238 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg, 15239 struct bpf_reg_state *src_reg) 15240 { 15241 s64 *dst_smin = &dst_reg->smin_value; 15242 s64 *dst_smax = &dst_reg->smax_value; 15243 u64 *dst_umin = &dst_reg->umin_value; 15244 u64 *dst_umax = &dst_reg->umax_value; 15245 s64 tmp_prod[4]; 15246 15247 if (check_mul_overflow(*dst_umax, src_reg->umax_value, dst_umax) || 15248 check_mul_overflow(*dst_umin, src_reg->umin_value, dst_umin)) { 15249 /* Overflow possible, we know nothing */ 15250 *dst_umin = 0; 15251 *dst_umax = U64_MAX; 15252 } 15253 if (check_mul_overflow(*dst_smin, src_reg->smin_value, &tmp_prod[0]) || 15254 check_mul_overflow(*dst_smin, src_reg->smax_value, &tmp_prod[1]) || 15255 check_mul_overflow(*dst_smax, src_reg->smin_value, &tmp_prod[2]) || 15256 check_mul_overflow(*dst_smax, src_reg->smax_value, &tmp_prod[3])) { 15257 /* Overflow possible, we know nothing */ 15258 *dst_smin = S64_MIN; 15259 *dst_smax = S64_MAX; 15260 } else { 15261 *dst_smin = min_array(tmp_prod, 4); 15262 *dst_smax = max_array(tmp_prod, 4); 15263 } 15264 } 15265 15266 static void scalar32_min_max_udiv(struct bpf_reg_state *dst_reg, 15267 struct bpf_reg_state *src_reg) 15268 { 15269 u32 *dst_umin = &dst_reg->u32_min_value; 15270 u32 *dst_umax = &dst_reg->u32_max_value; 15271 u32 src_val = src_reg->u32_min_value; /* non-zero, const divisor */ 15272 15273 *dst_umin = *dst_umin / src_val; 15274 *dst_umax = *dst_umax / src_val; 15275 15276 /* Reset other ranges/tnum to unbounded/unknown. */ 15277 dst_reg->s32_min_value = S32_MIN; 15278 dst_reg->s32_max_value = S32_MAX; 15279 reset_reg64_and_tnum(dst_reg); 15280 } 15281 15282 static void scalar_min_max_udiv(struct bpf_reg_state *dst_reg, 15283 struct bpf_reg_state *src_reg) 15284 { 15285 u64 *dst_umin = &dst_reg->umin_value; 15286 u64 *dst_umax = &dst_reg->umax_value; 15287 u64 src_val = src_reg->umin_value; /* non-zero, const divisor */ 15288 15289 *dst_umin = div64_u64(*dst_umin, src_val); 15290 *dst_umax = div64_u64(*dst_umax, src_val); 15291 15292 /* Reset other ranges/tnum to unbounded/unknown. */ 15293 dst_reg->smin_value = S64_MIN; 15294 dst_reg->smax_value = S64_MAX; 15295 reset_reg32_and_tnum(dst_reg); 15296 } 15297 15298 static void scalar32_min_max_sdiv(struct bpf_reg_state *dst_reg, 15299 struct bpf_reg_state *src_reg) 15300 { 15301 s32 *dst_smin = &dst_reg->s32_min_value; 15302 s32 *dst_smax = &dst_reg->s32_max_value; 15303 s32 src_val = src_reg->s32_min_value; /* non-zero, const divisor */ 15304 s32 res1, res2; 15305 15306 /* BPF div specification: S32_MIN / -1 = S32_MIN */ 15307 if (*dst_smin == S32_MIN && src_val == -1) { 15308 /* 15309 * If the dividend range contains more than just S32_MIN, 15310 * we cannot precisely track the result, so it becomes unbounded. 15311 * e.g., [S32_MIN, S32_MIN+10]/(-1), 15312 * = {S32_MIN} U [-(S32_MIN+10), -(S32_MIN+1)] 15313 * = {S32_MIN} U [S32_MAX-9, S32_MAX] = [S32_MIN, S32_MAX] 15314 * Otherwise (if dividend is exactly S32_MIN), result remains S32_MIN. 15315 */ 15316 if (*dst_smax != S32_MIN) { 15317 *dst_smin = S32_MIN; 15318 *dst_smax = S32_MAX; 15319 } 15320 goto reset; 15321 } 15322 15323 res1 = *dst_smin / src_val; 15324 res2 = *dst_smax / src_val; 15325 *dst_smin = min(res1, res2); 15326 *dst_smax = max(res1, res2); 15327 15328 reset: 15329 /* Reset other ranges/tnum to unbounded/unknown. */ 15330 dst_reg->u32_min_value = 0; 15331 dst_reg->u32_max_value = U32_MAX; 15332 reset_reg64_and_tnum(dst_reg); 15333 } 15334 15335 static void scalar_min_max_sdiv(struct bpf_reg_state *dst_reg, 15336 struct bpf_reg_state *src_reg) 15337 { 15338 s64 *dst_smin = &dst_reg->smin_value; 15339 s64 *dst_smax = &dst_reg->smax_value; 15340 s64 src_val = src_reg->smin_value; /* non-zero, const divisor */ 15341 s64 res1, res2; 15342 15343 /* BPF div specification: S64_MIN / -1 = S64_MIN */ 15344 if (*dst_smin == S64_MIN && src_val == -1) { 15345 /* 15346 * If the dividend range contains more than just S64_MIN, 15347 * we cannot precisely track the result, so it becomes unbounded. 15348 * e.g., [S64_MIN, S64_MIN+10]/(-1), 15349 * = {S64_MIN} U [-(S64_MIN+10), -(S64_MIN+1)] 15350 * = {S64_MIN} U [S64_MAX-9, S64_MAX] = [S64_MIN, S64_MAX] 15351 * Otherwise (if dividend is exactly S64_MIN), result remains S64_MIN. 15352 */ 15353 if (*dst_smax != S64_MIN) { 15354 *dst_smin = S64_MIN; 15355 *dst_smax = S64_MAX; 15356 } 15357 goto reset; 15358 } 15359 15360 res1 = div64_s64(*dst_smin, src_val); 15361 res2 = div64_s64(*dst_smax, src_val); 15362 *dst_smin = min(res1, res2); 15363 *dst_smax = max(res1, res2); 15364 15365 reset: 15366 /* Reset other ranges/tnum to unbounded/unknown. */ 15367 dst_reg->umin_value = 0; 15368 dst_reg->umax_value = U64_MAX; 15369 reset_reg32_and_tnum(dst_reg); 15370 } 15371 15372 static void scalar32_min_max_umod(struct bpf_reg_state *dst_reg, 15373 struct bpf_reg_state *src_reg) 15374 { 15375 u32 *dst_umin = &dst_reg->u32_min_value; 15376 u32 *dst_umax = &dst_reg->u32_max_value; 15377 u32 src_val = src_reg->u32_min_value; /* non-zero, const divisor */ 15378 u32 res_max = src_val - 1; 15379 15380 /* 15381 * If dst_umax <= res_max, the result remains unchanged. 15382 * e.g., [2, 5] % 10 = [2, 5]. 15383 */ 15384 if (*dst_umax <= res_max) 15385 return; 15386 15387 *dst_umin = 0; 15388 *dst_umax = min(*dst_umax, res_max); 15389 15390 /* Reset other ranges/tnum to unbounded/unknown. */ 15391 dst_reg->s32_min_value = S32_MIN; 15392 dst_reg->s32_max_value = S32_MAX; 15393 reset_reg64_and_tnum(dst_reg); 15394 } 15395 15396 static void scalar_min_max_umod(struct bpf_reg_state *dst_reg, 15397 struct bpf_reg_state *src_reg) 15398 { 15399 u64 *dst_umin = &dst_reg->umin_value; 15400 u64 *dst_umax = &dst_reg->umax_value; 15401 u64 src_val = src_reg->umin_value; /* non-zero, const divisor */ 15402 u64 res_max = src_val - 1; 15403 15404 /* 15405 * If dst_umax <= res_max, the result remains unchanged. 15406 * e.g., [2, 5] % 10 = [2, 5]. 15407 */ 15408 if (*dst_umax <= res_max) 15409 return; 15410 15411 *dst_umin = 0; 15412 *dst_umax = min(*dst_umax, res_max); 15413 15414 /* Reset other ranges/tnum to unbounded/unknown. */ 15415 dst_reg->smin_value = S64_MIN; 15416 dst_reg->smax_value = S64_MAX; 15417 reset_reg32_and_tnum(dst_reg); 15418 } 15419 15420 static void scalar32_min_max_smod(struct bpf_reg_state *dst_reg, 15421 struct bpf_reg_state *src_reg) 15422 { 15423 s32 *dst_smin = &dst_reg->s32_min_value; 15424 s32 *dst_smax = &dst_reg->s32_max_value; 15425 s32 src_val = src_reg->s32_min_value; /* non-zero, const divisor */ 15426 15427 /* 15428 * Safe absolute value calculation: 15429 * If src_val == S32_MIN (-2147483648), src_abs becomes 2147483648. 15430 * Here use unsigned integer to avoid overflow. 15431 */ 15432 u32 src_abs = (src_val > 0) ? (u32)src_val : -(u32)src_val; 15433 15434 /* 15435 * Calculate the maximum possible absolute value of the result. 15436 * Even if src_abs is 2147483648 (S32_MIN), subtracting 1 gives 15437 * 2147483647 (S32_MAX), which fits perfectly in s32. 15438 */ 15439 s32 res_max_abs = src_abs - 1; 15440 15441 /* 15442 * If the dividend is already within the result range, 15443 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5]. 15444 */ 15445 if (*dst_smin >= -res_max_abs && *dst_smax <= res_max_abs) 15446 return; 15447 15448 /* General case: result has the same sign as the dividend. */ 15449 if (*dst_smin >= 0) { 15450 *dst_smin = 0; 15451 *dst_smax = min(*dst_smax, res_max_abs); 15452 } else if (*dst_smax <= 0) { 15453 *dst_smax = 0; 15454 *dst_smin = max(*dst_smin, -res_max_abs); 15455 } else { 15456 *dst_smin = -res_max_abs; 15457 *dst_smax = res_max_abs; 15458 } 15459 15460 /* Reset other ranges/tnum to unbounded/unknown. */ 15461 dst_reg->u32_min_value = 0; 15462 dst_reg->u32_max_value = U32_MAX; 15463 reset_reg64_and_tnum(dst_reg); 15464 } 15465 15466 static void scalar_min_max_smod(struct bpf_reg_state *dst_reg, 15467 struct bpf_reg_state *src_reg) 15468 { 15469 s64 *dst_smin = &dst_reg->smin_value; 15470 s64 *dst_smax = &dst_reg->smax_value; 15471 s64 src_val = src_reg->smin_value; /* non-zero, const divisor */ 15472 15473 /* 15474 * Safe absolute value calculation: 15475 * If src_val == S64_MIN (-2^63), src_abs becomes 2^63. 15476 * Here use unsigned integer to avoid overflow. 15477 */ 15478 u64 src_abs = (src_val > 0) ? (u64)src_val : -(u64)src_val; 15479 15480 /* 15481 * Calculate the maximum possible absolute value of the result. 15482 * Even if src_abs is 2^63 (S64_MIN), subtracting 1 gives 15483 * 2^63 - 1 (S64_MAX), which fits perfectly in s64. 15484 */ 15485 s64 res_max_abs = src_abs - 1; 15486 15487 /* 15488 * If the dividend is already within the result range, 15489 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5]. 15490 */ 15491 if (*dst_smin >= -res_max_abs && *dst_smax <= res_max_abs) 15492 return; 15493 15494 /* General case: result has the same sign as the dividend. */ 15495 if (*dst_smin >= 0) { 15496 *dst_smin = 0; 15497 *dst_smax = min(*dst_smax, res_max_abs); 15498 } else if (*dst_smax <= 0) { 15499 *dst_smax = 0; 15500 *dst_smin = max(*dst_smin, -res_max_abs); 15501 } else { 15502 *dst_smin = -res_max_abs; 15503 *dst_smax = res_max_abs; 15504 } 15505 15506 /* Reset other ranges/tnum to unbounded/unknown. */ 15507 dst_reg->umin_value = 0; 15508 dst_reg->umax_value = U64_MAX; 15509 reset_reg32_and_tnum(dst_reg); 15510 } 15511 15512 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, 15513 struct bpf_reg_state *src_reg) 15514 { 15515 bool src_known = tnum_subreg_is_const(src_reg->var_off); 15516 bool dst_known = tnum_subreg_is_const(dst_reg->var_off); 15517 struct tnum var32_off = tnum_subreg(dst_reg->var_off); 15518 u32 umax_val = src_reg->u32_max_value; 15519 15520 if (src_known && dst_known) { 15521 __mark_reg32_known(dst_reg, var32_off.value); 15522 return; 15523 } 15524 15525 /* We get our minimum from the var_off, since that's inherently 15526 * bitwise. Our maximum is the minimum of the operands' maxima. 15527 */ 15528 dst_reg->u32_min_value = var32_off.value; 15529 dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val); 15530 15531 /* Safe to set s32 bounds by casting u32 result into s32 when u32 15532 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded. 15533 */ 15534 if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) { 15535 dst_reg->s32_min_value = dst_reg->u32_min_value; 15536 dst_reg->s32_max_value = dst_reg->u32_max_value; 15537 } else { 15538 dst_reg->s32_min_value = S32_MIN; 15539 dst_reg->s32_max_value = S32_MAX; 15540 } 15541 } 15542 15543 static void scalar_min_max_and(struct bpf_reg_state *dst_reg, 15544 struct bpf_reg_state *src_reg) 15545 { 15546 bool src_known = tnum_is_const(src_reg->var_off); 15547 bool dst_known = tnum_is_const(dst_reg->var_off); 15548 u64 umax_val = src_reg->umax_value; 15549 15550 if (src_known && dst_known) { 15551 __mark_reg_known(dst_reg, dst_reg->var_off.value); 15552 return; 15553 } 15554 15555 /* We get our minimum from the var_off, since that's inherently 15556 * bitwise. Our maximum is the minimum of the operands' maxima. 15557 */ 15558 dst_reg->umin_value = dst_reg->var_off.value; 15559 dst_reg->umax_value = min(dst_reg->umax_value, umax_val); 15560 15561 /* Safe to set s64 bounds by casting u64 result into s64 when u64 15562 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded. 15563 */ 15564 if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) { 15565 dst_reg->smin_value = dst_reg->umin_value; 15566 dst_reg->smax_value = dst_reg->umax_value; 15567 } else { 15568 dst_reg->smin_value = S64_MIN; 15569 dst_reg->smax_value = S64_MAX; 15570 } 15571 /* We may learn something more from the var_off */ 15572 __update_reg_bounds(dst_reg); 15573 } 15574 15575 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, 15576 struct bpf_reg_state *src_reg) 15577 { 15578 bool src_known = tnum_subreg_is_const(src_reg->var_off); 15579 bool dst_known = tnum_subreg_is_const(dst_reg->var_off); 15580 struct tnum var32_off = tnum_subreg(dst_reg->var_off); 15581 u32 umin_val = src_reg->u32_min_value; 15582 15583 if (src_known && dst_known) { 15584 __mark_reg32_known(dst_reg, var32_off.value); 15585 return; 15586 } 15587 15588 /* We get our maximum from the var_off, and our minimum is the 15589 * maximum of the operands' minima 15590 */ 15591 dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val); 15592 dst_reg->u32_max_value = var32_off.value | var32_off.mask; 15593 15594 /* Safe to set s32 bounds by casting u32 result into s32 when u32 15595 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded. 15596 */ 15597 if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) { 15598 dst_reg->s32_min_value = dst_reg->u32_min_value; 15599 dst_reg->s32_max_value = dst_reg->u32_max_value; 15600 } else { 15601 dst_reg->s32_min_value = S32_MIN; 15602 dst_reg->s32_max_value = S32_MAX; 15603 } 15604 } 15605 15606 static void scalar_min_max_or(struct bpf_reg_state *dst_reg, 15607 struct bpf_reg_state *src_reg) 15608 { 15609 bool src_known = tnum_is_const(src_reg->var_off); 15610 bool dst_known = tnum_is_const(dst_reg->var_off); 15611 u64 umin_val = src_reg->umin_value; 15612 15613 if (src_known && dst_known) { 15614 __mark_reg_known(dst_reg, dst_reg->var_off.value); 15615 return; 15616 } 15617 15618 /* We get our maximum from the var_off, and our minimum is the 15619 * maximum of the operands' minima 15620 */ 15621 dst_reg->umin_value = max(dst_reg->umin_value, umin_val); 15622 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask; 15623 15624 /* Safe to set s64 bounds by casting u64 result into s64 when u64 15625 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded. 15626 */ 15627 if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) { 15628 dst_reg->smin_value = dst_reg->umin_value; 15629 dst_reg->smax_value = dst_reg->umax_value; 15630 } else { 15631 dst_reg->smin_value = S64_MIN; 15632 dst_reg->smax_value = S64_MAX; 15633 } 15634 /* We may learn something more from the var_off */ 15635 __update_reg_bounds(dst_reg); 15636 } 15637 15638 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, 15639 struct bpf_reg_state *src_reg) 15640 { 15641 bool src_known = tnum_subreg_is_const(src_reg->var_off); 15642 bool dst_known = tnum_subreg_is_const(dst_reg->var_off); 15643 struct tnum var32_off = tnum_subreg(dst_reg->var_off); 15644 15645 if (src_known && dst_known) { 15646 __mark_reg32_known(dst_reg, var32_off.value); 15647 return; 15648 } 15649 15650 /* We get both minimum and maximum from the var32_off. */ 15651 dst_reg->u32_min_value = var32_off.value; 15652 dst_reg->u32_max_value = var32_off.value | var32_off.mask; 15653 15654 /* Safe to set s32 bounds by casting u32 result into s32 when u32 15655 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded. 15656 */ 15657 if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) { 15658 dst_reg->s32_min_value = dst_reg->u32_min_value; 15659 dst_reg->s32_max_value = dst_reg->u32_max_value; 15660 } else { 15661 dst_reg->s32_min_value = S32_MIN; 15662 dst_reg->s32_max_value = S32_MAX; 15663 } 15664 } 15665 15666 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg, 15667 struct bpf_reg_state *src_reg) 15668 { 15669 bool src_known = tnum_is_const(src_reg->var_off); 15670 bool dst_known = tnum_is_const(dst_reg->var_off); 15671 15672 if (src_known && dst_known) { 15673 /* dst_reg->var_off.value has been updated earlier */ 15674 __mark_reg_known(dst_reg, dst_reg->var_off.value); 15675 return; 15676 } 15677 15678 /* We get both minimum and maximum from the var_off. */ 15679 dst_reg->umin_value = dst_reg->var_off.value; 15680 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask; 15681 15682 /* Safe to set s64 bounds by casting u64 result into s64 when u64 15683 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded. 15684 */ 15685 if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) { 15686 dst_reg->smin_value = dst_reg->umin_value; 15687 dst_reg->smax_value = dst_reg->umax_value; 15688 } else { 15689 dst_reg->smin_value = S64_MIN; 15690 dst_reg->smax_value = S64_MAX; 15691 } 15692 15693 __update_reg_bounds(dst_reg); 15694 } 15695 15696 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, 15697 u64 umin_val, u64 umax_val) 15698 { 15699 /* We lose all sign bit information (except what we can pick 15700 * up from var_off) 15701 */ 15702 dst_reg->s32_min_value = S32_MIN; 15703 dst_reg->s32_max_value = S32_MAX; 15704 /* If we might shift our top bit out, then we know nothing */ 15705 if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) { 15706 dst_reg->u32_min_value = 0; 15707 dst_reg->u32_max_value = U32_MAX; 15708 } else { 15709 dst_reg->u32_min_value <<= umin_val; 15710 dst_reg->u32_max_value <<= umax_val; 15711 } 15712 } 15713 15714 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, 15715 struct bpf_reg_state *src_reg) 15716 { 15717 u32 umax_val = src_reg->u32_max_value; 15718 u32 umin_val = src_reg->u32_min_value; 15719 /* u32 alu operation will zext upper bits */ 15720 struct tnum subreg = tnum_subreg(dst_reg->var_off); 15721 15722 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val); 15723 dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val)); 15724 /* Not required but being careful mark reg64 bounds as unknown so 15725 * that we are forced to pick them up from tnum and zext later and 15726 * if some path skips this step we are still safe. 15727 */ 15728 __mark_reg64_unbounded(dst_reg); 15729 __update_reg32_bounds(dst_reg); 15730 } 15731 15732 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg, 15733 u64 umin_val, u64 umax_val) 15734 { 15735 /* Special case <<32 because it is a common compiler pattern to sign 15736 * extend subreg by doing <<32 s>>32. smin/smax assignments are correct 15737 * because s32 bounds don't flip sign when shifting to the left by 15738 * 32bits. 15739 */ 15740 if (umin_val == 32 && umax_val == 32) { 15741 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32; 15742 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32; 15743 } else { 15744 dst_reg->smax_value = S64_MAX; 15745 dst_reg->smin_value = S64_MIN; 15746 } 15747 15748 /* If we might shift our top bit out, then we know nothing */ 15749 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) { 15750 dst_reg->umin_value = 0; 15751 dst_reg->umax_value = U64_MAX; 15752 } else { 15753 dst_reg->umin_value <<= umin_val; 15754 dst_reg->umax_value <<= umax_val; 15755 } 15756 } 15757 15758 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg, 15759 struct bpf_reg_state *src_reg) 15760 { 15761 u64 umax_val = src_reg->umax_value; 15762 u64 umin_val = src_reg->umin_value; 15763 15764 /* scalar64 calc uses 32bit unshifted bounds so must be called first */ 15765 __scalar64_min_max_lsh(dst_reg, umin_val, umax_val); 15766 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val); 15767 15768 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); 15769 /* We may learn something more from the var_off */ 15770 __update_reg_bounds(dst_reg); 15771 } 15772 15773 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg, 15774 struct bpf_reg_state *src_reg) 15775 { 15776 struct tnum subreg = tnum_subreg(dst_reg->var_off); 15777 u32 umax_val = src_reg->u32_max_value; 15778 u32 umin_val = src_reg->u32_min_value; 15779 15780 /* BPF_RSH is an unsigned shift. If the value in dst_reg might 15781 * be negative, then either: 15782 * 1) src_reg might be zero, so the sign bit of the result is 15783 * unknown, so we lose our signed bounds 15784 * 2) it's known negative, thus the unsigned bounds capture the 15785 * signed bounds 15786 * 3) the signed bounds cross zero, so they tell us nothing 15787 * about the result 15788 * If the value in dst_reg is known nonnegative, then again the 15789 * unsigned bounds capture the signed bounds. 15790 * Thus, in all cases it suffices to blow away our signed bounds 15791 * and rely on inferring new ones from the unsigned bounds and 15792 * var_off of the result. 15793 */ 15794 dst_reg->s32_min_value = S32_MIN; 15795 dst_reg->s32_max_value = S32_MAX; 15796 15797 dst_reg->var_off = tnum_rshift(subreg, umin_val); 15798 dst_reg->u32_min_value >>= umax_val; 15799 dst_reg->u32_max_value >>= umin_val; 15800 15801 __mark_reg64_unbounded(dst_reg); 15802 __update_reg32_bounds(dst_reg); 15803 } 15804 15805 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg, 15806 struct bpf_reg_state *src_reg) 15807 { 15808 u64 umax_val = src_reg->umax_value; 15809 u64 umin_val = src_reg->umin_value; 15810 15811 /* BPF_RSH is an unsigned shift. If the value in dst_reg might 15812 * be negative, then either: 15813 * 1) src_reg might be zero, so the sign bit of the result is 15814 * unknown, so we lose our signed bounds 15815 * 2) it's known negative, thus the unsigned bounds capture the 15816 * signed bounds 15817 * 3) the signed bounds cross zero, so they tell us nothing 15818 * about the result 15819 * If the value in dst_reg is known nonnegative, then again the 15820 * unsigned bounds capture the signed bounds. 15821 * Thus, in all cases it suffices to blow away our signed bounds 15822 * and rely on inferring new ones from the unsigned bounds and 15823 * var_off of the result. 15824 */ 15825 dst_reg->smin_value = S64_MIN; 15826 dst_reg->smax_value = S64_MAX; 15827 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); 15828 dst_reg->umin_value >>= umax_val; 15829 dst_reg->umax_value >>= umin_val; 15830 15831 /* Its not easy to operate on alu32 bounds here because it depends 15832 * on bits being shifted in. Take easy way out and mark unbounded 15833 * so we can recalculate later from tnum. 15834 */ 15835 __mark_reg32_unbounded(dst_reg); 15836 __update_reg_bounds(dst_reg); 15837 } 15838 15839 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg, 15840 struct bpf_reg_state *src_reg) 15841 { 15842 u64 umin_val = src_reg->u32_min_value; 15843 15844 /* Upon reaching here, src_known is true and 15845 * umax_val is equal to umin_val. 15846 */ 15847 dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val); 15848 dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val); 15849 15850 dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32); 15851 15852 /* blow away the dst_reg umin_value/umax_value and rely on 15853 * dst_reg var_off to refine the result. 15854 */ 15855 dst_reg->u32_min_value = 0; 15856 dst_reg->u32_max_value = U32_MAX; 15857 15858 __mark_reg64_unbounded(dst_reg); 15859 __update_reg32_bounds(dst_reg); 15860 } 15861 15862 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg, 15863 struct bpf_reg_state *src_reg) 15864 { 15865 u64 umin_val = src_reg->umin_value; 15866 15867 /* Upon reaching here, src_known is true and umax_val is equal 15868 * to umin_val. 15869 */ 15870 dst_reg->smin_value >>= umin_val; 15871 dst_reg->smax_value >>= umin_val; 15872 15873 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64); 15874 15875 /* blow away the dst_reg umin_value/umax_value and rely on 15876 * dst_reg var_off to refine the result. 15877 */ 15878 dst_reg->umin_value = 0; 15879 dst_reg->umax_value = U64_MAX; 15880 15881 /* Its not easy to operate on alu32 bounds here because it depends 15882 * on bits being shifted in from upper 32-bits. Take easy way out 15883 * and mark unbounded so we can recalculate later from tnum. 15884 */ 15885 __mark_reg32_unbounded(dst_reg); 15886 __update_reg_bounds(dst_reg); 15887 } 15888 15889 static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *insn) 15890 { 15891 /* 15892 * Byte swap operation - update var_off using tnum_bswap. 15893 * Three cases: 15894 * 1. bswap(16|32|64): opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE) 15895 * unconditional swap 15896 * 2. to_le(16|32|64): opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE) 15897 * swap on big-endian, truncation or no-op on little-endian 15898 * 3. to_be(16|32|64): opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE) 15899 * swap on little-endian, truncation or no-op on big-endian 15900 */ 15901 15902 bool alu64 = BPF_CLASS(insn->code) == BPF_ALU64; 15903 bool to_le = BPF_SRC(insn->code) == BPF_TO_LE; 15904 bool is_big_endian; 15905 #ifdef CONFIG_CPU_BIG_ENDIAN 15906 is_big_endian = true; 15907 #else 15908 is_big_endian = false; 15909 #endif 15910 /* Apply bswap if alu64 or switch between big-endian and little-endian machines */ 15911 bool need_bswap = alu64 || (to_le == is_big_endian); 15912 15913 if (need_bswap) { 15914 if (insn->imm == 16) 15915 dst_reg->var_off = tnum_bswap16(dst_reg->var_off); 15916 else if (insn->imm == 32) 15917 dst_reg->var_off = tnum_bswap32(dst_reg->var_off); 15918 else if (insn->imm == 64) 15919 dst_reg->var_off = tnum_bswap64(dst_reg->var_off); 15920 /* 15921 * Byteswap scrambles the range, so we must reset bounds. 15922 * Bounds will be re-derived from the new tnum later. 15923 */ 15924 __mark_reg_unbounded(dst_reg); 15925 } 15926 /* For bswap16/32, truncate dst register to match the swapped size */ 15927 if (insn->imm == 16 || insn->imm == 32) 15928 coerce_reg_to_size(dst_reg, insn->imm / 8); 15929 } 15930 15931 static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn, 15932 const struct bpf_reg_state *src_reg) 15933 { 15934 bool src_is_const = false; 15935 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; 15936 15937 if (insn_bitness == 32) { 15938 if (tnum_subreg_is_const(src_reg->var_off) 15939 && src_reg->s32_min_value == src_reg->s32_max_value 15940 && src_reg->u32_min_value == src_reg->u32_max_value) 15941 src_is_const = true; 15942 } else { 15943 if (tnum_is_const(src_reg->var_off) 15944 && src_reg->smin_value == src_reg->smax_value 15945 && src_reg->umin_value == src_reg->umax_value) 15946 src_is_const = true; 15947 } 15948 15949 switch (BPF_OP(insn->code)) { 15950 case BPF_ADD: 15951 case BPF_SUB: 15952 case BPF_NEG: 15953 case BPF_AND: 15954 case BPF_XOR: 15955 case BPF_OR: 15956 case BPF_MUL: 15957 case BPF_END: 15958 return true; 15959 15960 /* 15961 * Division and modulo operators range is only safe to compute when the 15962 * divisor is a constant. 15963 */ 15964 case BPF_DIV: 15965 case BPF_MOD: 15966 return src_is_const; 15967 15968 /* Shift operators range is only computable if shift dimension operand 15969 * is a constant. Shifts greater than 31 or 63 are undefined. This 15970 * includes shifts by a negative number. 15971 */ 15972 case BPF_LSH: 15973 case BPF_RSH: 15974 case BPF_ARSH: 15975 return (src_is_const && src_reg->umax_value < insn_bitness); 15976 default: 15977 return false; 15978 } 15979 } 15980 15981 static int maybe_fork_scalars(struct bpf_verifier_env *env, struct bpf_insn *insn, 15982 struct bpf_reg_state *dst_reg) 15983 { 15984 struct bpf_verifier_state *branch; 15985 struct bpf_reg_state *regs; 15986 bool alu32; 15987 15988 if (dst_reg->smin_value == -1 && dst_reg->smax_value == 0) 15989 alu32 = false; 15990 else if (dst_reg->s32_min_value == -1 && dst_reg->s32_max_value == 0) 15991 alu32 = true; 15992 else 15993 return 0; 15994 15995 branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false); 15996 if (IS_ERR(branch)) 15997 return PTR_ERR(branch); 15998 15999 regs = branch->frame[branch->curframe]->regs; 16000 if (alu32) { 16001 __mark_reg32_known(®s[insn->dst_reg], 0); 16002 __mark_reg32_known(dst_reg, -1ull); 16003 } else { 16004 __mark_reg_known(®s[insn->dst_reg], 0); 16005 __mark_reg_known(dst_reg, -1ull); 16006 } 16007 return 0; 16008 } 16009 16010 /* WARNING: This function does calculations on 64-bit values, but the actual 16011 * execution may occur on 32-bit values. Therefore, things like bitshifts 16012 * need extra checks in the 32-bit case. 16013 */ 16014 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, 16015 struct bpf_insn *insn, 16016 struct bpf_reg_state *dst_reg, 16017 struct bpf_reg_state src_reg) 16018 { 16019 u8 opcode = BPF_OP(insn->code); 16020 s16 off = insn->off; 16021 bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64); 16022 int ret; 16023 16024 if (!is_safe_to_compute_dst_reg_range(insn, &src_reg)) { 16025 __mark_reg_unknown(env, dst_reg); 16026 return 0; 16027 } 16028 16029 if (sanitize_needed(opcode)) { 16030 ret = sanitize_val_alu(env, insn); 16031 if (ret < 0) 16032 return sanitize_err(env, insn, ret, NULL, NULL); 16033 } 16034 16035 /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops. 16036 * There are two classes of instructions: The first class we track both 16037 * alu32 and alu64 sign/unsigned bounds independently this provides the 16038 * greatest amount of precision when alu operations are mixed with jmp32 16039 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD, 16040 * and BPF_OR. This is possible because these ops have fairly easy to 16041 * understand and calculate behavior in both 32-bit and 64-bit alu ops. 16042 * See alu32 verifier tests for examples. The second class of 16043 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy 16044 * with regards to tracking sign/unsigned bounds because the bits may 16045 * cross subreg boundaries in the alu64 case. When this happens we mark 16046 * the reg unbounded in the subreg bound space and use the resulting 16047 * tnum to calculate an approximation of the sign/unsigned bounds. 16048 */ 16049 switch (opcode) { 16050 case BPF_ADD: 16051 scalar32_min_max_add(dst_reg, &src_reg); 16052 scalar_min_max_add(dst_reg, &src_reg); 16053 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); 16054 break; 16055 case BPF_SUB: 16056 scalar32_min_max_sub(dst_reg, &src_reg); 16057 scalar_min_max_sub(dst_reg, &src_reg); 16058 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off); 16059 break; 16060 case BPF_NEG: 16061 env->fake_reg[0] = *dst_reg; 16062 __mark_reg_known(dst_reg, 0); 16063 scalar32_min_max_sub(dst_reg, &env->fake_reg[0]); 16064 scalar_min_max_sub(dst_reg, &env->fake_reg[0]); 16065 dst_reg->var_off = tnum_neg(env->fake_reg[0].var_off); 16066 break; 16067 case BPF_MUL: 16068 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off); 16069 scalar32_min_max_mul(dst_reg, &src_reg); 16070 scalar_min_max_mul(dst_reg, &src_reg); 16071 break; 16072 case BPF_DIV: 16073 /* BPF div specification: x / 0 = 0 */ 16074 if ((alu32 && src_reg.u32_min_value == 0) || (!alu32 && src_reg.umin_value == 0)) { 16075 ___mark_reg_known(dst_reg, 0); 16076 break; 16077 } 16078 if (alu32) 16079 if (off == 1) 16080 scalar32_min_max_sdiv(dst_reg, &src_reg); 16081 else 16082 scalar32_min_max_udiv(dst_reg, &src_reg); 16083 else 16084 if (off == 1) 16085 scalar_min_max_sdiv(dst_reg, &src_reg); 16086 else 16087 scalar_min_max_udiv(dst_reg, &src_reg); 16088 break; 16089 case BPF_MOD: 16090 /* BPF mod specification: x % 0 = x */ 16091 if ((alu32 && src_reg.u32_min_value == 0) || (!alu32 && src_reg.umin_value == 0)) 16092 break; 16093 if (alu32) 16094 if (off == 1) 16095 scalar32_min_max_smod(dst_reg, &src_reg); 16096 else 16097 scalar32_min_max_umod(dst_reg, &src_reg); 16098 else 16099 if (off == 1) 16100 scalar_min_max_smod(dst_reg, &src_reg); 16101 else 16102 scalar_min_max_umod(dst_reg, &src_reg); 16103 break; 16104 case BPF_AND: 16105 if (tnum_is_const(src_reg.var_off)) { 16106 ret = maybe_fork_scalars(env, insn, dst_reg); 16107 if (ret) 16108 return ret; 16109 } 16110 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off); 16111 scalar32_min_max_and(dst_reg, &src_reg); 16112 scalar_min_max_and(dst_reg, &src_reg); 16113 break; 16114 case BPF_OR: 16115 if (tnum_is_const(src_reg.var_off)) { 16116 ret = maybe_fork_scalars(env, insn, dst_reg); 16117 if (ret) 16118 return ret; 16119 } 16120 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off); 16121 scalar32_min_max_or(dst_reg, &src_reg); 16122 scalar_min_max_or(dst_reg, &src_reg); 16123 break; 16124 case BPF_XOR: 16125 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off); 16126 scalar32_min_max_xor(dst_reg, &src_reg); 16127 scalar_min_max_xor(dst_reg, &src_reg); 16128 break; 16129 case BPF_LSH: 16130 if (alu32) 16131 scalar32_min_max_lsh(dst_reg, &src_reg); 16132 else 16133 scalar_min_max_lsh(dst_reg, &src_reg); 16134 break; 16135 case BPF_RSH: 16136 if (alu32) 16137 scalar32_min_max_rsh(dst_reg, &src_reg); 16138 else 16139 scalar_min_max_rsh(dst_reg, &src_reg); 16140 break; 16141 case BPF_ARSH: 16142 if (alu32) 16143 scalar32_min_max_arsh(dst_reg, &src_reg); 16144 else 16145 scalar_min_max_arsh(dst_reg, &src_reg); 16146 break; 16147 case BPF_END: 16148 scalar_byte_swap(dst_reg, insn); 16149 break; 16150 default: 16151 break; 16152 } 16153 16154 /* 16155 * ALU32 ops are zero extended into 64bit register. 16156 * 16157 * BPF_END is already handled inside the helper (truncation), 16158 * so skip zext here to avoid unexpected zero extension. 16159 * e.g., le64: opcode=(BPF_END|BPF_ALU|BPF_TO_LE), imm=0x40 16160 * This is a 64bit byte swap operation with alu32==true, 16161 * but we should not zero extend the result. 16162 */ 16163 if (alu32 && opcode != BPF_END) 16164 zext_32_to_64(dst_reg); 16165 reg_bounds_sync(dst_reg); 16166 return 0; 16167 } 16168 16169 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max 16170 * and var_off. 16171 */ 16172 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, 16173 struct bpf_insn *insn) 16174 { 16175 struct bpf_verifier_state *vstate = env->cur_state; 16176 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 16177 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; 16178 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; 16179 bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64); 16180 u8 opcode = BPF_OP(insn->code); 16181 int err; 16182 16183 dst_reg = ®s[insn->dst_reg]; 16184 src_reg = NULL; 16185 16186 if (dst_reg->type == PTR_TO_ARENA) { 16187 struct bpf_insn_aux_data *aux = cur_aux(env); 16188 16189 if (BPF_CLASS(insn->code) == BPF_ALU64) 16190 /* 16191 * 32-bit operations zero upper bits automatically. 16192 * 64-bit operations need to be converted to 32. 16193 */ 16194 aux->needs_zext = true; 16195 16196 /* Any arithmetic operations are allowed on arena pointers */ 16197 return 0; 16198 } 16199 16200 if (dst_reg->type != SCALAR_VALUE) 16201 ptr_reg = dst_reg; 16202 16203 if (BPF_SRC(insn->code) == BPF_X) { 16204 src_reg = ®s[insn->src_reg]; 16205 if (src_reg->type != SCALAR_VALUE) { 16206 if (dst_reg->type != SCALAR_VALUE) { 16207 /* Combining two pointers by any ALU op yields 16208 * an arbitrary scalar. Disallow all math except 16209 * pointer subtraction 16210 */ 16211 if (opcode == BPF_SUB && env->allow_ptr_leaks) { 16212 mark_reg_unknown(env, regs, insn->dst_reg); 16213 return 0; 16214 } 16215 verbose(env, "R%d pointer %s pointer prohibited\n", 16216 insn->dst_reg, 16217 bpf_alu_string[opcode >> 4]); 16218 return -EACCES; 16219 } else { 16220 /* scalar += pointer 16221 * This is legal, but we have to reverse our 16222 * src/dest handling in computing the range 16223 */ 16224 err = mark_chain_precision(env, insn->dst_reg); 16225 if (err) 16226 return err; 16227 return adjust_ptr_min_max_vals(env, insn, 16228 src_reg, dst_reg); 16229 } 16230 } else if (ptr_reg) { 16231 /* pointer += scalar */ 16232 err = mark_chain_precision(env, insn->src_reg); 16233 if (err) 16234 return err; 16235 return adjust_ptr_min_max_vals(env, insn, 16236 dst_reg, src_reg); 16237 } else if (dst_reg->precise) { 16238 /* if dst_reg is precise, src_reg should be precise as well */ 16239 err = mark_chain_precision(env, insn->src_reg); 16240 if (err) 16241 return err; 16242 } 16243 } else { 16244 /* Pretend the src is a reg with a known value, since we only 16245 * need to be able to read from this state. 16246 */ 16247 off_reg.type = SCALAR_VALUE; 16248 __mark_reg_known(&off_reg, insn->imm); 16249 src_reg = &off_reg; 16250 if (ptr_reg) /* pointer += K */ 16251 return adjust_ptr_min_max_vals(env, insn, 16252 ptr_reg, src_reg); 16253 } 16254 16255 /* Got here implies adding two SCALAR_VALUEs */ 16256 if (WARN_ON_ONCE(ptr_reg)) { 16257 print_verifier_state(env, vstate, vstate->curframe, true); 16258 verbose(env, "verifier internal error: unexpected ptr_reg\n"); 16259 return -EFAULT; 16260 } 16261 if (WARN_ON(!src_reg)) { 16262 print_verifier_state(env, vstate, vstate->curframe, true); 16263 verbose(env, "verifier internal error: no src_reg\n"); 16264 return -EFAULT; 16265 } 16266 /* 16267 * For alu32 linked register tracking, we need to check dst_reg's 16268 * umax_value before the ALU operation. After adjust_scalar_min_max_vals(), 16269 * alu32 ops will have zero-extended the result, making umax_value <= U32_MAX. 16270 */ 16271 u64 dst_umax = dst_reg->umax_value; 16272 16273 err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); 16274 if (err) 16275 return err; 16276 /* 16277 * Compilers can generate the code 16278 * r1 = r2 16279 * r1 += 0x1 16280 * if r2 < 1000 goto ... 16281 * use r1 in memory access 16282 * So remember constant delta between r2 and r1 and update r1 after 16283 * 'if' condition. 16284 */ 16285 if (env->bpf_capable && 16286 (BPF_OP(insn->code) == BPF_ADD || BPF_OP(insn->code) == BPF_SUB) && 16287 dst_reg->id && is_reg_const(src_reg, alu32)) { 16288 u64 val = reg_const_value(src_reg, alu32); 16289 s32 off; 16290 16291 if (!alu32 && ((s64)val < S32_MIN || (s64)val > S32_MAX)) 16292 goto clear_id; 16293 16294 if (alu32 && (dst_umax > U32_MAX)) 16295 goto clear_id; 16296 16297 off = (s32)val; 16298 16299 if (BPF_OP(insn->code) == BPF_SUB) { 16300 /* Negating S32_MIN would overflow */ 16301 if (off == S32_MIN) 16302 goto clear_id; 16303 off = -off; 16304 } 16305 16306 if (dst_reg->id & BPF_ADD_CONST) { 16307 /* 16308 * If the register already went through rX += val 16309 * we cannot accumulate another val into rx->off. 16310 */ 16311 clear_id: 16312 dst_reg->off = 0; 16313 dst_reg->id = 0; 16314 } else { 16315 if (alu32) 16316 dst_reg->id |= BPF_ADD_CONST32; 16317 else 16318 dst_reg->id |= BPF_ADD_CONST64; 16319 dst_reg->off = off; 16320 } 16321 } else { 16322 /* 16323 * Make sure ID is cleared otherwise dst_reg min/max could be 16324 * incorrectly propagated into other registers by sync_linked_regs() 16325 */ 16326 dst_reg->id = 0; 16327 } 16328 return 0; 16329 } 16330 16331 /* check validity of 32-bit and 64-bit arithmetic operations */ 16332 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) 16333 { 16334 struct bpf_reg_state *regs = cur_regs(env); 16335 u8 opcode = BPF_OP(insn->code); 16336 int err; 16337 16338 if (opcode == BPF_END || opcode == BPF_NEG) { 16339 if (opcode == BPF_NEG) { 16340 if (BPF_SRC(insn->code) != BPF_K || 16341 insn->src_reg != BPF_REG_0 || 16342 insn->off != 0 || insn->imm != 0) { 16343 verbose(env, "BPF_NEG uses reserved fields\n"); 16344 return -EINVAL; 16345 } 16346 } else { 16347 if (insn->src_reg != BPF_REG_0 || insn->off != 0 || 16348 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) || 16349 (BPF_CLASS(insn->code) == BPF_ALU64 && 16350 BPF_SRC(insn->code) != BPF_TO_LE)) { 16351 verbose(env, "BPF_END uses reserved fields\n"); 16352 return -EINVAL; 16353 } 16354 } 16355 16356 /* check src operand */ 16357 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 16358 if (err) 16359 return err; 16360 16361 if (is_pointer_value(env, insn->dst_reg)) { 16362 verbose(env, "R%d pointer arithmetic prohibited\n", 16363 insn->dst_reg); 16364 return -EACCES; 16365 } 16366 16367 /* check dest operand */ 16368 if ((opcode == BPF_NEG || opcode == BPF_END) && 16369 regs[insn->dst_reg].type == SCALAR_VALUE) { 16370 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 16371 err = err ?: adjust_scalar_min_max_vals(env, insn, 16372 ®s[insn->dst_reg], 16373 regs[insn->dst_reg]); 16374 } else { 16375 err = check_reg_arg(env, insn->dst_reg, DST_OP); 16376 } 16377 if (err) 16378 return err; 16379 16380 } else if (opcode == BPF_MOV) { 16381 16382 if (BPF_SRC(insn->code) == BPF_X) { 16383 if (BPF_CLASS(insn->code) == BPF_ALU) { 16384 if ((insn->off != 0 && insn->off != 8 && insn->off != 16) || 16385 insn->imm) { 16386 verbose(env, "BPF_MOV uses reserved fields\n"); 16387 return -EINVAL; 16388 } 16389 } else if (insn->off == BPF_ADDR_SPACE_CAST) { 16390 if (insn->imm != 1 && insn->imm != 1u << 16) { 16391 verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n"); 16392 return -EINVAL; 16393 } 16394 if (!env->prog->aux->arena) { 16395 verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n"); 16396 return -EINVAL; 16397 } 16398 } else { 16399 if ((insn->off != 0 && insn->off != 8 && insn->off != 16 && 16400 insn->off != 32) || insn->imm) { 16401 verbose(env, "BPF_MOV uses reserved fields\n"); 16402 return -EINVAL; 16403 } 16404 } 16405 16406 /* check src operand */ 16407 err = check_reg_arg(env, insn->src_reg, SRC_OP); 16408 if (err) 16409 return err; 16410 } else { 16411 if (insn->src_reg != BPF_REG_0 || insn->off != 0) { 16412 verbose(env, "BPF_MOV uses reserved fields\n"); 16413 return -EINVAL; 16414 } 16415 } 16416 16417 /* check dest operand, mark as required later */ 16418 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 16419 if (err) 16420 return err; 16421 16422 if (BPF_SRC(insn->code) == BPF_X) { 16423 struct bpf_reg_state *src_reg = regs + insn->src_reg; 16424 struct bpf_reg_state *dst_reg = regs + insn->dst_reg; 16425 16426 if (BPF_CLASS(insn->code) == BPF_ALU64) { 16427 if (insn->imm) { 16428 /* off == BPF_ADDR_SPACE_CAST */ 16429 mark_reg_unknown(env, regs, insn->dst_reg); 16430 if (insn->imm == 1) { /* cast from as(1) to as(0) */ 16431 dst_reg->type = PTR_TO_ARENA; 16432 /* PTR_TO_ARENA is 32-bit */ 16433 dst_reg->subreg_def = env->insn_idx + 1; 16434 } 16435 } else if (insn->off == 0) { 16436 /* case: R1 = R2 16437 * copy register state to dest reg 16438 */ 16439 assign_scalar_id_before_mov(env, src_reg); 16440 copy_register_state(dst_reg, src_reg); 16441 dst_reg->subreg_def = DEF_NOT_SUBREG; 16442 } else { 16443 /* case: R1 = (s8, s16 s32)R2 */ 16444 if (is_pointer_value(env, insn->src_reg)) { 16445 verbose(env, 16446 "R%d sign-extension part of pointer\n", 16447 insn->src_reg); 16448 return -EACCES; 16449 } else if (src_reg->type == SCALAR_VALUE) { 16450 bool no_sext; 16451 16452 no_sext = src_reg->umax_value < (1ULL << (insn->off - 1)); 16453 if (no_sext) 16454 assign_scalar_id_before_mov(env, src_reg); 16455 copy_register_state(dst_reg, src_reg); 16456 if (!no_sext) 16457 dst_reg->id = 0; 16458 coerce_reg_to_size_sx(dst_reg, insn->off >> 3); 16459 dst_reg->subreg_def = DEF_NOT_SUBREG; 16460 } else { 16461 mark_reg_unknown(env, regs, insn->dst_reg); 16462 } 16463 } 16464 } else { 16465 /* R1 = (u32) R2 */ 16466 if (is_pointer_value(env, insn->src_reg)) { 16467 verbose(env, 16468 "R%d partial copy of pointer\n", 16469 insn->src_reg); 16470 return -EACCES; 16471 } else if (src_reg->type == SCALAR_VALUE) { 16472 if (insn->off == 0) { 16473 bool is_src_reg_u32 = get_reg_width(src_reg) <= 32; 16474 16475 if (is_src_reg_u32) 16476 assign_scalar_id_before_mov(env, src_reg); 16477 copy_register_state(dst_reg, src_reg); 16478 /* Make sure ID is cleared if src_reg is not in u32 16479 * range otherwise dst_reg min/max could be incorrectly 16480 * propagated into src_reg by sync_linked_regs() 16481 */ 16482 if (!is_src_reg_u32) 16483 dst_reg->id = 0; 16484 dst_reg->subreg_def = env->insn_idx + 1; 16485 } else { 16486 /* case: W1 = (s8, s16)W2 */ 16487 bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1)); 16488 16489 if (no_sext) 16490 assign_scalar_id_before_mov(env, src_reg); 16491 copy_register_state(dst_reg, src_reg); 16492 if (!no_sext) 16493 dst_reg->id = 0; 16494 dst_reg->subreg_def = env->insn_idx + 1; 16495 coerce_subreg_to_size_sx(dst_reg, insn->off >> 3); 16496 } 16497 } else { 16498 mark_reg_unknown(env, regs, 16499 insn->dst_reg); 16500 } 16501 zext_32_to_64(dst_reg); 16502 reg_bounds_sync(dst_reg); 16503 } 16504 } else { 16505 /* case: R = imm 16506 * remember the value we stored into this reg 16507 */ 16508 /* clear any state __mark_reg_known doesn't set */ 16509 mark_reg_unknown(env, regs, insn->dst_reg); 16510 regs[insn->dst_reg].type = SCALAR_VALUE; 16511 if (BPF_CLASS(insn->code) == BPF_ALU64) { 16512 __mark_reg_known(regs + insn->dst_reg, 16513 insn->imm); 16514 } else { 16515 __mark_reg_known(regs + insn->dst_reg, 16516 (u32)insn->imm); 16517 } 16518 } 16519 16520 } else if (opcode > BPF_END) { 16521 verbose(env, "invalid BPF_ALU opcode %x\n", opcode); 16522 return -EINVAL; 16523 16524 } else { /* all other ALU ops: and, sub, xor, add, ... */ 16525 16526 if (BPF_SRC(insn->code) == BPF_X) { 16527 if (insn->imm != 0 || (insn->off != 0 && insn->off != 1) || 16528 (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) { 16529 verbose(env, "BPF_ALU uses reserved fields\n"); 16530 return -EINVAL; 16531 } 16532 /* check src1 operand */ 16533 err = check_reg_arg(env, insn->src_reg, SRC_OP); 16534 if (err) 16535 return err; 16536 } else { 16537 if (insn->src_reg != BPF_REG_0 || (insn->off != 0 && insn->off != 1) || 16538 (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) { 16539 verbose(env, "BPF_ALU uses reserved fields\n"); 16540 return -EINVAL; 16541 } 16542 } 16543 16544 /* check src2 operand */ 16545 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 16546 if (err) 16547 return err; 16548 16549 if ((opcode == BPF_MOD || opcode == BPF_DIV) && 16550 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) { 16551 verbose(env, "div by zero\n"); 16552 return -EINVAL; 16553 } 16554 16555 if ((opcode == BPF_LSH || opcode == BPF_RSH || 16556 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { 16557 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; 16558 16559 if (insn->imm < 0 || insn->imm >= size) { 16560 verbose(env, "invalid shift %d\n", insn->imm); 16561 return -EINVAL; 16562 } 16563 } 16564 16565 /* check dest operand */ 16566 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 16567 err = err ?: adjust_reg_min_max_vals(env, insn); 16568 if (err) 16569 return err; 16570 } 16571 16572 return reg_bounds_sanity_check(env, ®s[insn->dst_reg], "alu"); 16573 } 16574 16575 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, 16576 struct bpf_reg_state *dst_reg, 16577 enum bpf_reg_type type, 16578 bool range_right_open) 16579 { 16580 struct bpf_func_state *state; 16581 struct bpf_reg_state *reg; 16582 int new_range; 16583 16584 if (dst_reg->off < 0 || 16585 (dst_reg->off == 0 && range_right_open)) 16586 /* This doesn't give us any range */ 16587 return; 16588 16589 if (dst_reg->umax_value > MAX_PACKET_OFF || 16590 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF) 16591 /* Risk of overflow. For instance, ptr + (1<<63) may be less 16592 * than pkt_end, but that's because it's also less than pkt. 16593 */ 16594 return; 16595 16596 new_range = dst_reg->off; 16597 if (range_right_open) 16598 new_range++; 16599 16600 /* Examples for register markings: 16601 * 16602 * pkt_data in dst register: 16603 * 16604 * r2 = r3; 16605 * r2 += 8; 16606 * if (r2 > pkt_end) goto <handle exception> 16607 * <access okay> 16608 * 16609 * r2 = r3; 16610 * r2 += 8; 16611 * if (r2 < pkt_end) goto <access okay> 16612 * <handle exception> 16613 * 16614 * Where: 16615 * r2 == dst_reg, pkt_end == src_reg 16616 * r2=pkt(id=n,off=8,r=0) 16617 * r3=pkt(id=n,off=0,r=0) 16618 * 16619 * pkt_data in src register: 16620 * 16621 * r2 = r3; 16622 * r2 += 8; 16623 * if (pkt_end >= r2) goto <access okay> 16624 * <handle exception> 16625 * 16626 * r2 = r3; 16627 * r2 += 8; 16628 * if (pkt_end <= r2) goto <handle exception> 16629 * <access okay> 16630 * 16631 * Where: 16632 * pkt_end == dst_reg, r2 == src_reg 16633 * r2=pkt(id=n,off=8,r=0) 16634 * r3=pkt(id=n,off=0,r=0) 16635 * 16636 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) 16637 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8) 16638 * and [r3, r3 + 8-1) respectively is safe to access depending on 16639 * the check. 16640 */ 16641 16642 /* If our ids match, then we must have the same max_value. And we 16643 * don't care about the other reg's fixed offset, since if it's too big 16644 * the range won't allow anything. 16645 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. 16646 */ 16647 bpf_for_each_reg_in_vstate(vstate, state, reg, ({ 16648 if (reg->type == type && reg->id == dst_reg->id) 16649 /* keep the maximum range already checked */ 16650 reg->range = max(reg->range, new_range); 16651 })); 16652 } 16653 16654 /* 16655 * <reg1> <op> <reg2>, currently assuming reg2 is a constant 16656 */ 16657 static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, 16658 u8 opcode, bool is_jmp32) 16659 { 16660 struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off; 16661 struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off; 16662 u64 umin1 = is_jmp32 ? (u64)reg1->u32_min_value : reg1->umin_value; 16663 u64 umax1 = is_jmp32 ? (u64)reg1->u32_max_value : reg1->umax_value; 16664 s64 smin1 = is_jmp32 ? (s64)reg1->s32_min_value : reg1->smin_value; 16665 s64 smax1 = is_jmp32 ? (s64)reg1->s32_max_value : reg1->smax_value; 16666 u64 umin2 = is_jmp32 ? (u64)reg2->u32_min_value : reg2->umin_value; 16667 u64 umax2 = is_jmp32 ? (u64)reg2->u32_max_value : reg2->umax_value; 16668 s64 smin2 = is_jmp32 ? (s64)reg2->s32_min_value : reg2->smin_value; 16669 s64 smax2 = is_jmp32 ? (s64)reg2->s32_max_value : reg2->smax_value; 16670 16671 if (reg1 == reg2) { 16672 switch (opcode) { 16673 case BPF_JGE: 16674 case BPF_JLE: 16675 case BPF_JSGE: 16676 case BPF_JSLE: 16677 case BPF_JEQ: 16678 return 1; 16679 case BPF_JGT: 16680 case BPF_JLT: 16681 case BPF_JSGT: 16682 case BPF_JSLT: 16683 case BPF_JNE: 16684 return 0; 16685 case BPF_JSET: 16686 if (tnum_is_const(t1)) 16687 return t1.value != 0; 16688 else 16689 return (smin1 <= 0 && smax1 >= 0) ? -1 : 1; 16690 default: 16691 return -1; 16692 } 16693 } 16694 16695 switch (opcode) { 16696 case BPF_JEQ: 16697 /* constants, umin/umax and smin/smax checks would be 16698 * redundant in this case because they all should match 16699 */ 16700 if (tnum_is_const(t1) && tnum_is_const(t2)) 16701 return t1.value == t2.value; 16702 if (!tnum_overlap(t1, t2)) 16703 return 0; 16704 /* non-overlapping ranges */ 16705 if (umin1 > umax2 || umax1 < umin2) 16706 return 0; 16707 if (smin1 > smax2 || smax1 < smin2) 16708 return 0; 16709 if (!is_jmp32) { 16710 /* if 64-bit ranges are inconclusive, see if we can 16711 * utilize 32-bit subrange knowledge to eliminate 16712 * branches that can't be taken a priori 16713 */ 16714 if (reg1->u32_min_value > reg2->u32_max_value || 16715 reg1->u32_max_value < reg2->u32_min_value) 16716 return 0; 16717 if (reg1->s32_min_value > reg2->s32_max_value || 16718 reg1->s32_max_value < reg2->s32_min_value) 16719 return 0; 16720 } 16721 break; 16722 case BPF_JNE: 16723 /* constants, umin/umax and smin/smax checks would be 16724 * redundant in this case because they all should match 16725 */ 16726 if (tnum_is_const(t1) && tnum_is_const(t2)) 16727 return t1.value != t2.value; 16728 if (!tnum_overlap(t1, t2)) 16729 return 1; 16730 /* non-overlapping ranges */ 16731 if (umin1 > umax2 || umax1 < umin2) 16732 return 1; 16733 if (smin1 > smax2 || smax1 < smin2) 16734 return 1; 16735 if (!is_jmp32) { 16736 /* if 64-bit ranges are inconclusive, see if we can 16737 * utilize 32-bit subrange knowledge to eliminate 16738 * branches that can't be taken a priori 16739 */ 16740 if (reg1->u32_min_value > reg2->u32_max_value || 16741 reg1->u32_max_value < reg2->u32_min_value) 16742 return 1; 16743 if (reg1->s32_min_value > reg2->s32_max_value || 16744 reg1->s32_max_value < reg2->s32_min_value) 16745 return 1; 16746 } 16747 break; 16748 case BPF_JSET: 16749 if (!is_reg_const(reg2, is_jmp32)) { 16750 swap(reg1, reg2); 16751 swap(t1, t2); 16752 } 16753 if (!is_reg_const(reg2, is_jmp32)) 16754 return -1; 16755 if ((~t1.mask & t1.value) & t2.value) 16756 return 1; 16757 if (!((t1.mask | t1.value) & t2.value)) 16758 return 0; 16759 break; 16760 case BPF_JGT: 16761 if (umin1 > umax2) 16762 return 1; 16763 else if (umax1 <= umin2) 16764 return 0; 16765 break; 16766 case BPF_JSGT: 16767 if (smin1 > smax2) 16768 return 1; 16769 else if (smax1 <= smin2) 16770 return 0; 16771 break; 16772 case BPF_JLT: 16773 if (umax1 < umin2) 16774 return 1; 16775 else if (umin1 >= umax2) 16776 return 0; 16777 break; 16778 case BPF_JSLT: 16779 if (smax1 < smin2) 16780 return 1; 16781 else if (smin1 >= smax2) 16782 return 0; 16783 break; 16784 case BPF_JGE: 16785 if (umin1 >= umax2) 16786 return 1; 16787 else if (umax1 < umin2) 16788 return 0; 16789 break; 16790 case BPF_JSGE: 16791 if (smin1 >= smax2) 16792 return 1; 16793 else if (smax1 < smin2) 16794 return 0; 16795 break; 16796 case BPF_JLE: 16797 if (umax1 <= umin2) 16798 return 1; 16799 else if (umin1 > umax2) 16800 return 0; 16801 break; 16802 case BPF_JSLE: 16803 if (smax1 <= smin2) 16804 return 1; 16805 else if (smin1 > smax2) 16806 return 0; 16807 break; 16808 } 16809 16810 return -1; 16811 } 16812 16813 static int flip_opcode(u32 opcode) 16814 { 16815 /* How can we transform "a <op> b" into "b <op> a"? */ 16816 static const u8 opcode_flip[16] = { 16817 /* these stay the same */ 16818 [BPF_JEQ >> 4] = BPF_JEQ, 16819 [BPF_JNE >> 4] = BPF_JNE, 16820 [BPF_JSET >> 4] = BPF_JSET, 16821 /* these swap "lesser" and "greater" (L and G in the opcodes) */ 16822 [BPF_JGE >> 4] = BPF_JLE, 16823 [BPF_JGT >> 4] = BPF_JLT, 16824 [BPF_JLE >> 4] = BPF_JGE, 16825 [BPF_JLT >> 4] = BPF_JGT, 16826 [BPF_JSGE >> 4] = BPF_JSLE, 16827 [BPF_JSGT >> 4] = BPF_JSLT, 16828 [BPF_JSLE >> 4] = BPF_JSGE, 16829 [BPF_JSLT >> 4] = BPF_JSGT 16830 }; 16831 return opcode_flip[opcode >> 4]; 16832 } 16833 16834 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg, 16835 struct bpf_reg_state *src_reg, 16836 u8 opcode) 16837 { 16838 struct bpf_reg_state *pkt; 16839 16840 if (src_reg->type == PTR_TO_PACKET_END) { 16841 pkt = dst_reg; 16842 } else if (dst_reg->type == PTR_TO_PACKET_END) { 16843 pkt = src_reg; 16844 opcode = flip_opcode(opcode); 16845 } else { 16846 return -1; 16847 } 16848 16849 if (pkt->range >= 0) 16850 return -1; 16851 16852 switch (opcode) { 16853 case BPF_JLE: 16854 /* pkt <= pkt_end */ 16855 fallthrough; 16856 case BPF_JGT: 16857 /* pkt > pkt_end */ 16858 if (pkt->range == BEYOND_PKT_END) 16859 /* pkt has at last one extra byte beyond pkt_end */ 16860 return opcode == BPF_JGT; 16861 break; 16862 case BPF_JLT: 16863 /* pkt < pkt_end */ 16864 fallthrough; 16865 case BPF_JGE: 16866 /* pkt >= pkt_end */ 16867 if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END) 16868 return opcode == BPF_JGE; 16869 break; 16870 } 16871 return -1; 16872 } 16873 16874 /* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;" 16875 * and return: 16876 * 1 - branch will be taken and "goto target" will be executed 16877 * 0 - branch will not be taken and fall-through to next insn 16878 * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value 16879 * range [0,10] 16880 */ 16881 static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, 16882 u8 opcode, bool is_jmp32) 16883 { 16884 if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32) 16885 return is_pkt_ptr_branch_taken(reg1, reg2, opcode); 16886 16887 if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) { 16888 u64 val; 16889 16890 /* arrange that reg2 is a scalar, and reg1 is a pointer */ 16891 if (!is_reg_const(reg2, is_jmp32)) { 16892 opcode = flip_opcode(opcode); 16893 swap(reg1, reg2); 16894 } 16895 /* and ensure that reg2 is a constant */ 16896 if (!is_reg_const(reg2, is_jmp32)) 16897 return -1; 16898 16899 if (!reg_not_null(reg1)) 16900 return -1; 16901 16902 /* If pointer is valid tests against zero will fail so we can 16903 * use this to direct branch taken. 16904 */ 16905 val = reg_const_value(reg2, is_jmp32); 16906 if (val != 0) 16907 return -1; 16908 16909 switch (opcode) { 16910 case BPF_JEQ: 16911 return 0; 16912 case BPF_JNE: 16913 return 1; 16914 default: 16915 return -1; 16916 } 16917 } 16918 16919 /* now deal with two scalars, but not necessarily constants */ 16920 return is_scalar_branch_taken(reg1, reg2, opcode, is_jmp32); 16921 } 16922 16923 /* Opcode that corresponds to a *false* branch condition. 16924 * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2 16925 */ 16926 static u8 rev_opcode(u8 opcode) 16927 { 16928 switch (opcode) { 16929 case BPF_JEQ: return BPF_JNE; 16930 case BPF_JNE: return BPF_JEQ; 16931 /* JSET doesn't have it's reverse opcode in BPF, so add 16932 * BPF_X flag to denote the reverse of that operation 16933 */ 16934 case BPF_JSET: return BPF_JSET | BPF_X; 16935 case BPF_JSET | BPF_X: return BPF_JSET; 16936 case BPF_JGE: return BPF_JLT; 16937 case BPF_JGT: return BPF_JLE; 16938 case BPF_JLE: return BPF_JGT; 16939 case BPF_JLT: return BPF_JGE; 16940 case BPF_JSGE: return BPF_JSLT; 16941 case BPF_JSGT: return BPF_JSLE; 16942 case BPF_JSLE: return BPF_JSGT; 16943 case BPF_JSLT: return BPF_JSGE; 16944 default: return 0; 16945 } 16946 } 16947 16948 /* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */ 16949 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, 16950 u8 opcode, bool is_jmp32) 16951 { 16952 struct tnum t; 16953 u64 val; 16954 16955 /* In case of GE/GT/SGE/JST, reuse LE/LT/SLE/SLT logic from below */ 16956 switch (opcode) { 16957 case BPF_JGE: 16958 case BPF_JGT: 16959 case BPF_JSGE: 16960 case BPF_JSGT: 16961 opcode = flip_opcode(opcode); 16962 swap(reg1, reg2); 16963 break; 16964 default: 16965 break; 16966 } 16967 16968 switch (opcode) { 16969 case BPF_JEQ: 16970 if (is_jmp32) { 16971 reg1->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value); 16972 reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value); 16973 reg1->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value); 16974 reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value); 16975 reg2->u32_min_value = reg1->u32_min_value; 16976 reg2->u32_max_value = reg1->u32_max_value; 16977 reg2->s32_min_value = reg1->s32_min_value; 16978 reg2->s32_max_value = reg1->s32_max_value; 16979 16980 t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off)); 16981 reg1->var_off = tnum_with_subreg(reg1->var_off, t); 16982 reg2->var_off = tnum_with_subreg(reg2->var_off, t); 16983 } else { 16984 reg1->umin_value = max(reg1->umin_value, reg2->umin_value); 16985 reg1->umax_value = min(reg1->umax_value, reg2->umax_value); 16986 reg1->smin_value = max(reg1->smin_value, reg2->smin_value); 16987 reg1->smax_value = min(reg1->smax_value, reg2->smax_value); 16988 reg2->umin_value = reg1->umin_value; 16989 reg2->umax_value = reg1->umax_value; 16990 reg2->smin_value = reg1->smin_value; 16991 reg2->smax_value = reg1->smax_value; 16992 16993 reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off); 16994 reg2->var_off = reg1->var_off; 16995 } 16996 break; 16997 case BPF_JNE: 16998 if (!is_reg_const(reg2, is_jmp32)) 16999 swap(reg1, reg2); 17000 if (!is_reg_const(reg2, is_jmp32)) 17001 break; 17002 17003 /* try to recompute the bound of reg1 if reg2 is a const and 17004 * is exactly the edge of reg1. 17005 */ 17006 val = reg_const_value(reg2, is_jmp32); 17007 if (is_jmp32) { 17008 /* u32_min_value is not equal to 0xffffffff at this point, 17009 * because otherwise u32_max_value is 0xffffffff as well, 17010 * in such a case both reg1 and reg2 would be constants, 17011 * jump would be predicted and reg_set_min_max() won't 17012 * be called. 17013 * 17014 * Same reasoning works for all {u,s}{min,max}{32,64} cases 17015 * below. 17016 */ 17017 if (reg1->u32_min_value == (u32)val) 17018 reg1->u32_min_value++; 17019 if (reg1->u32_max_value == (u32)val) 17020 reg1->u32_max_value--; 17021 if (reg1->s32_min_value == (s32)val) 17022 reg1->s32_min_value++; 17023 if (reg1->s32_max_value == (s32)val) 17024 reg1->s32_max_value--; 17025 } else { 17026 if (reg1->umin_value == (u64)val) 17027 reg1->umin_value++; 17028 if (reg1->umax_value == (u64)val) 17029 reg1->umax_value--; 17030 if (reg1->smin_value == (s64)val) 17031 reg1->smin_value++; 17032 if (reg1->smax_value == (s64)val) 17033 reg1->smax_value--; 17034 } 17035 break; 17036 case BPF_JSET: 17037 if (!is_reg_const(reg2, is_jmp32)) 17038 swap(reg1, reg2); 17039 if (!is_reg_const(reg2, is_jmp32)) 17040 break; 17041 val = reg_const_value(reg2, is_jmp32); 17042 /* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X) 17043 * requires single bit to learn something useful. E.g., if we 17044 * know that `r1 & 0x3` is true, then which bits (0, 1, or both) 17045 * are actually set? We can learn something definite only if 17046 * it's a single-bit value to begin with. 17047 * 17048 * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have 17049 * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor 17050 * bit 1 is set, which we can readily use in adjustments. 17051 */ 17052 if (!is_power_of_2(val)) 17053 break; 17054 if (is_jmp32) { 17055 t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val)); 17056 reg1->var_off = tnum_with_subreg(reg1->var_off, t); 17057 } else { 17058 reg1->var_off = tnum_or(reg1->var_off, tnum_const(val)); 17059 } 17060 break; 17061 case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */ 17062 if (!is_reg_const(reg2, is_jmp32)) 17063 swap(reg1, reg2); 17064 if (!is_reg_const(reg2, is_jmp32)) 17065 break; 17066 val = reg_const_value(reg2, is_jmp32); 17067 /* Forget the ranges before narrowing tnums, to avoid invariant 17068 * violations if we're on a dead branch. 17069 */ 17070 __mark_reg_unbounded(reg1); 17071 if (is_jmp32) { 17072 t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val)); 17073 reg1->var_off = tnum_with_subreg(reg1->var_off, t); 17074 } else { 17075 reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val)); 17076 } 17077 break; 17078 case BPF_JLE: 17079 if (is_jmp32) { 17080 reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value); 17081 reg2->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value); 17082 } else { 17083 reg1->umax_value = min(reg1->umax_value, reg2->umax_value); 17084 reg2->umin_value = max(reg1->umin_value, reg2->umin_value); 17085 } 17086 break; 17087 case BPF_JLT: 17088 if (is_jmp32) { 17089 reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value - 1); 17090 reg2->u32_min_value = max(reg1->u32_min_value + 1, reg2->u32_min_value); 17091 } else { 17092 reg1->umax_value = min(reg1->umax_value, reg2->umax_value - 1); 17093 reg2->umin_value = max(reg1->umin_value + 1, reg2->umin_value); 17094 } 17095 break; 17096 case BPF_JSLE: 17097 if (is_jmp32) { 17098 reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value); 17099 reg2->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value); 17100 } else { 17101 reg1->smax_value = min(reg1->smax_value, reg2->smax_value); 17102 reg2->smin_value = max(reg1->smin_value, reg2->smin_value); 17103 } 17104 break; 17105 case BPF_JSLT: 17106 if (is_jmp32) { 17107 reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value - 1); 17108 reg2->s32_min_value = max(reg1->s32_min_value + 1, reg2->s32_min_value); 17109 } else { 17110 reg1->smax_value = min(reg1->smax_value, reg2->smax_value - 1); 17111 reg2->smin_value = max(reg1->smin_value + 1, reg2->smin_value); 17112 } 17113 break; 17114 default: 17115 return; 17116 } 17117 } 17118 17119 /* Adjusts the register min/max values in the case that the dst_reg and 17120 * src_reg are both SCALAR_VALUE registers (or we are simply doing a BPF_K 17121 * check, in which case we have a fake SCALAR_VALUE representing insn->imm). 17122 * Technically we can do similar adjustments for pointers to the same object, 17123 * but we don't support that right now. 17124 */ 17125 static int reg_set_min_max(struct bpf_verifier_env *env, 17126 struct bpf_reg_state *true_reg1, 17127 struct bpf_reg_state *true_reg2, 17128 struct bpf_reg_state *false_reg1, 17129 struct bpf_reg_state *false_reg2, 17130 u8 opcode, bool is_jmp32) 17131 { 17132 int err; 17133 17134 /* If either register is a pointer, we can't learn anything about its 17135 * variable offset from the compare (unless they were a pointer into 17136 * the same object, but we don't bother with that). 17137 */ 17138 if (false_reg1->type != SCALAR_VALUE || false_reg2->type != SCALAR_VALUE) 17139 return 0; 17140 17141 /* We compute branch direction for same SCALAR_VALUE registers in 17142 * is_scalar_branch_taken(). For unknown branch directions (e.g., BPF_JSET) 17143 * on the same registers, we don't need to adjust the min/max values. 17144 */ 17145 if (false_reg1 == false_reg2) 17146 return 0; 17147 17148 /* fallthrough (FALSE) branch */ 17149 regs_refine_cond_op(false_reg1, false_reg2, rev_opcode(opcode), is_jmp32); 17150 reg_bounds_sync(false_reg1); 17151 reg_bounds_sync(false_reg2); 17152 17153 /* jump (TRUE) branch */ 17154 regs_refine_cond_op(true_reg1, true_reg2, opcode, is_jmp32); 17155 reg_bounds_sync(true_reg1); 17156 reg_bounds_sync(true_reg2); 17157 17158 err = reg_bounds_sanity_check(env, true_reg1, "true_reg1"); 17159 err = err ?: reg_bounds_sanity_check(env, true_reg2, "true_reg2"); 17160 err = err ?: reg_bounds_sanity_check(env, false_reg1, "false_reg1"); 17161 err = err ?: reg_bounds_sanity_check(env, false_reg2, "false_reg2"); 17162 return err; 17163 } 17164 17165 static void mark_ptr_or_null_reg(struct bpf_func_state *state, 17166 struct bpf_reg_state *reg, u32 id, 17167 bool is_null) 17168 { 17169 if (type_may_be_null(reg->type) && reg->id == id && 17170 (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) { 17171 /* Old offset (both fixed and variable parts) should have been 17172 * known-zero, because we don't allow pointer arithmetic on 17173 * pointers that might be NULL. If we see this happening, don't 17174 * convert the register. 17175 * 17176 * But in some cases, some helpers that return local kptrs 17177 * advance offset for the returned pointer. In those cases, it 17178 * is fine to expect to see reg->off. 17179 */ 17180 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0))) 17181 return; 17182 if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) && 17183 WARN_ON_ONCE(reg->off)) 17184 return; 17185 17186 if (is_null) { 17187 reg->type = SCALAR_VALUE; 17188 /* We don't need id and ref_obj_id from this point 17189 * onwards anymore, thus we should better reset it, 17190 * so that state pruning has chances to take effect. 17191 */ 17192 reg->id = 0; 17193 reg->ref_obj_id = 0; 17194 17195 return; 17196 } 17197 17198 mark_ptr_not_null_reg(reg); 17199 17200 if (!reg_may_point_to_spin_lock(reg)) { 17201 /* For not-NULL ptr, reg->ref_obj_id will be reset 17202 * in release_reference(). 17203 * 17204 * reg->id is still used by spin_lock ptr. Other 17205 * than spin_lock ptr type, reg->id can be reset. 17206 */ 17207 reg->id = 0; 17208 } 17209 } 17210 } 17211 17212 /* The logic is similar to find_good_pkt_pointers(), both could eventually 17213 * be folded together at some point. 17214 */ 17215 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, 17216 bool is_null) 17217 { 17218 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 17219 struct bpf_reg_state *regs = state->regs, *reg; 17220 u32 ref_obj_id = regs[regno].ref_obj_id; 17221 u32 id = regs[regno].id; 17222 17223 if (ref_obj_id && ref_obj_id == id && is_null) 17224 /* regs[regno] is in the " == NULL" branch. 17225 * No one could have freed the reference state before 17226 * doing the NULL check. 17227 */ 17228 WARN_ON_ONCE(release_reference_nomark(vstate, id)); 17229 17230 bpf_for_each_reg_in_vstate(vstate, state, reg, ({ 17231 mark_ptr_or_null_reg(state, reg, id, is_null); 17232 })); 17233 } 17234 17235 static bool try_match_pkt_pointers(const struct bpf_insn *insn, 17236 struct bpf_reg_state *dst_reg, 17237 struct bpf_reg_state *src_reg, 17238 struct bpf_verifier_state *this_branch, 17239 struct bpf_verifier_state *other_branch) 17240 { 17241 if (BPF_SRC(insn->code) != BPF_X) 17242 return false; 17243 17244 /* Pointers are always 64-bit. */ 17245 if (BPF_CLASS(insn->code) == BPF_JMP32) 17246 return false; 17247 17248 switch (BPF_OP(insn->code)) { 17249 case BPF_JGT: 17250 if ((dst_reg->type == PTR_TO_PACKET && 17251 src_reg->type == PTR_TO_PACKET_END) || 17252 (dst_reg->type == PTR_TO_PACKET_META && 17253 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 17254 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */ 17255 find_good_pkt_pointers(this_branch, dst_reg, 17256 dst_reg->type, false); 17257 mark_pkt_end(other_branch, insn->dst_reg, true); 17258 } else if ((dst_reg->type == PTR_TO_PACKET_END && 17259 src_reg->type == PTR_TO_PACKET) || 17260 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 17261 src_reg->type == PTR_TO_PACKET_META)) { 17262 /* pkt_end > pkt_data', pkt_data > pkt_meta' */ 17263 find_good_pkt_pointers(other_branch, src_reg, 17264 src_reg->type, true); 17265 mark_pkt_end(this_branch, insn->src_reg, false); 17266 } else { 17267 return false; 17268 } 17269 break; 17270 case BPF_JLT: 17271 if ((dst_reg->type == PTR_TO_PACKET && 17272 src_reg->type == PTR_TO_PACKET_END) || 17273 (dst_reg->type == PTR_TO_PACKET_META && 17274 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 17275 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */ 17276 find_good_pkt_pointers(other_branch, dst_reg, 17277 dst_reg->type, true); 17278 mark_pkt_end(this_branch, insn->dst_reg, false); 17279 } else if ((dst_reg->type == PTR_TO_PACKET_END && 17280 src_reg->type == PTR_TO_PACKET) || 17281 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 17282 src_reg->type == PTR_TO_PACKET_META)) { 17283 /* pkt_end < pkt_data', pkt_data > pkt_meta' */ 17284 find_good_pkt_pointers(this_branch, src_reg, 17285 src_reg->type, false); 17286 mark_pkt_end(other_branch, insn->src_reg, true); 17287 } else { 17288 return false; 17289 } 17290 break; 17291 case BPF_JGE: 17292 if ((dst_reg->type == PTR_TO_PACKET && 17293 src_reg->type == PTR_TO_PACKET_END) || 17294 (dst_reg->type == PTR_TO_PACKET_META && 17295 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 17296 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */ 17297 find_good_pkt_pointers(this_branch, dst_reg, 17298 dst_reg->type, true); 17299 mark_pkt_end(other_branch, insn->dst_reg, false); 17300 } else if ((dst_reg->type == PTR_TO_PACKET_END && 17301 src_reg->type == PTR_TO_PACKET) || 17302 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 17303 src_reg->type == PTR_TO_PACKET_META)) { 17304 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */ 17305 find_good_pkt_pointers(other_branch, src_reg, 17306 src_reg->type, false); 17307 mark_pkt_end(this_branch, insn->src_reg, true); 17308 } else { 17309 return false; 17310 } 17311 break; 17312 case BPF_JLE: 17313 if ((dst_reg->type == PTR_TO_PACKET && 17314 src_reg->type == PTR_TO_PACKET_END) || 17315 (dst_reg->type == PTR_TO_PACKET_META && 17316 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 17317 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */ 17318 find_good_pkt_pointers(other_branch, dst_reg, 17319 dst_reg->type, false); 17320 mark_pkt_end(this_branch, insn->dst_reg, true); 17321 } else if ((dst_reg->type == PTR_TO_PACKET_END && 17322 src_reg->type == PTR_TO_PACKET) || 17323 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 17324 src_reg->type == PTR_TO_PACKET_META)) { 17325 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */ 17326 find_good_pkt_pointers(this_branch, src_reg, 17327 src_reg->type, true); 17328 mark_pkt_end(other_branch, insn->src_reg, false); 17329 } else { 17330 return false; 17331 } 17332 break; 17333 default: 17334 return false; 17335 } 17336 17337 return true; 17338 } 17339 17340 static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_state *reg, 17341 u32 id, u32 frameno, u32 spi_or_reg, bool is_reg) 17342 { 17343 struct linked_reg *e; 17344 17345 if (reg->type != SCALAR_VALUE || (reg->id & ~BPF_ADD_CONST) != id) 17346 return; 17347 17348 e = linked_regs_push(reg_set); 17349 if (e) { 17350 e->frameno = frameno; 17351 e->is_reg = is_reg; 17352 e->regno = spi_or_reg; 17353 } else { 17354 reg->id = 0; 17355 } 17356 } 17357 17358 /* For all R being scalar registers or spilled scalar registers 17359 * in verifier state, save R in linked_regs if R->id == id. 17360 * If there are too many Rs sharing same id, reset id for leftover Rs. 17361 */ 17362 static void collect_linked_regs(struct bpf_verifier_env *env, 17363 struct bpf_verifier_state *vstate, 17364 u32 id, 17365 struct linked_regs *linked_regs) 17366 { 17367 struct bpf_insn_aux_data *aux = env->insn_aux_data; 17368 struct bpf_func_state *func; 17369 struct bpf_reg_state *reg; 17370 u16 live_regs; 17371 int i, j; 17372 17373 id = id & ~BPF_ADD_CONST; 17374 for (i = vstate->curframe; i >= 0; i--) { 17375 live_regs = aux[frame_insn_idx(vstate, i)].live_regs_before; 17376 func = vstate->frame[i]; 17377 for (j = 0; j < BPF_REG_FP; j++) { 17378 if (!(live_regs & BIT(j))) 17379 continue; 17380 reg = &func->regs[j]; 17381 __collect_linked_regs(linked_regs, reg, id, i, j, true); 17382 } 17383 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { 17384 if (!is_spilled_reg(&func->stack[j])) 17385 continue; 17386 reg = &func->stack[j].spilled_ptr; 17387 __collect_linked_regs(linked_regs, reg, id, i, j, false); 17388 } 17389 } 17390 } 17391 17392 /* For all R in linked_regs, copy known_reg range into R 17393 * if R->id == known_reg->id. 17394 */ 17395 static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_state *vstate, 17396 struct bpf_reg_state *known_reg, struct linked_regs *linked_regs) 17397 { 17398 struct bpf_reg_state fake_reg; 17399 struct bpf_reg_state *reg; 17400 struct linked_reg *e; 17401 int i; 17402 17403 for (i = 0; i < linked_regs->cnt; ++i) { 17404 e = &linked_regs->entries[i]; 17405 reg = e->is_reg ? &vstate->frame[e->frameno]->regs[e->regno] 17406 : &vstate->frame[e->frameno]->stack[e->spi].spilled_ptr; 17407 if (reg->type != SCALAR_VALUE || reg == known_reg) 17408 continue; 17409 if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST)) 17410 continue; 17411 if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) || 17412 reg->off == known_reg->off) { 17413 s32 saved_subreg_def = reg->subreg_def; 17414 17415 copy_register_state(reg, known_reg); 17416 reg->subreg_def = saved_subreg_def; 17417 } else { 17418 s32 saved_subreg_def = reg->subreg_def; 17419 s32 saved_off = reg->off; 17420 u32 saved_id = reg->id; 17421 17422 fake_reg.type = SCALAR_VALUE; 17423 __mark_reg_known(&fake_reg, (s64)reg->off - (s64)known_reg->off); 17424 17425 /* reg = known_reg; reg += delta */ 17426 copy_register_state(reg, known_reg); 17427 /* 17428 * Must preserve off, id and subreg_def flag, 17429 * otherwise another sync_linked_regs() will be incorrect. 17430 */ 17431 reg->off = saved_off; 17432 reg->id = saved_id; 17433 reg->subreg_def = saved_subreg_def; 17434 17435 scalar32_min_max_add(reg, &fake_reg); 17436 scalar_min_max_add(reg, &fake_reg); 17437 reg->var_off = tnum_add(reg->var_off, fake_reg.var_off); 17438 if (known_reg->id & BPF_ADD_CONST32) 17439 zext_32_to_64(reg); 17440 reg_bounds_sync(reg); 17441 } 17442 if (e->is_reg) 17443 mark_reg_scratched(env, e->regno); 17444 else 17445 mark_stack_slot_scratched(env, e->spi); 17446 } 17447 } 17448 17449 static int check_cond_jmp_op(struct bpf_verifier_env *env, 17450 struct bpf_insn *insn, int *insn_idx) 17451 { 17452 struct bpf_verifier_state *this_branch = env->cur_state; 17453 struct bpf_verifier_state *other_branch; 17454 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; 17455 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; 17456 struct bpf_reg_state *eq_branch_regs; 17457 struct linked_regs linked_regs = {}; 17458 u8 opcode = BPF_OP(insn->code); 17459 int insn_flags = 0; 17460 bool is_jmp32; 17461 int pred = -1; 17462 int err; 17463 17464 /* Only conditional jumps are expected to reach here. */ 17465 if (opcode == BPF_JA || opcode > BPF_JCOND) { 17466 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode); 17467 return -EINVAL; 17468 } 17469 17470 if (opcode == BPF_JCOND) { 17471 struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st; 17472 int idx = *insn_idx; 17473 17474 if (insn->code != (BPF_JMP | BPF_JCOND) || 17475 insn->src_reg != BPF_MAY_GOTO || 17476 insn->dst_reg || insn->imm) { 17477 verbose(env, "invalid may_goto imm %d\n", insn->imm); 17478 return -EINVAL; 17479 } 17480 prev_st = find_prev_entry(env, cur_st->parent, idx); 17481 17482 /* branch out 'fallthrough' insn as a new state to explore */ 17483 queued_st = push_stack(env, idx + 1, idx, false); 17484 if (IS_ERR(queued_st)) 17485 return PTR_ERR(queued_st); 17486 17487 queued_st->may_goto_depth++; 17488 if (prev_st) 17489 widen_imprecise_scalars(env, prev_st, queued_st); 17490 *insn_idx += insn->off; 17491 return 0; 17492 } 17493 17494 /* check src2 operand */ 17495 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 17496 if (err) 17497 return err; 17498 17499 dst_reg = ®s[insn->dst_reg]; 17500 if (BPF_SRC(insn->code) == BPF_X) { 17501 if (insn->imm != 0) { 17502 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); 17503 return -EINVAL; 17504 } 17505 17506 /* check src1 operand */ 17507 err = check_reg_arg(env, insn->src_reg, SRC_OP); 17508 if (err) 17509 return err; 17510 17511 src_reg = ®s[insn->src_reg]; 17512 if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) && 17513 is_pointer_value(env, insn->src_reg)) { 17514 verbose(env, "R%d pointer comparison prohibited\n", 17515 insn->src_reg); 17516 return -EACCES; 17517 } 17518 17519 if (src_reg->type == PTR_TO_STACK) 17520 insn_flags |= INSN_F_SRC_REG_STACK; 17521 if (dst_reg->type == PTR_TO_STACK) 17522 insn_flags |= INSN_F_DST_REG_STACK; 17523 } else { 17524 if (insn->src_reg != BPF_REG_0) { 17525 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); 17526 return -EINVAL; 17527 } 17528 src_reg = &env->fake_reg[0]; 17529 memset(src_reg, 0, sizeof(*src_reg)); 17530 src_reg->type = SCALAR_VALUE; 17531 __mark_reg_known(src_reg, insn->imm); 17532 17533 if (dst_reg->type == PTR_TO_STACK) 17534 insn_flags |= INSN_F_DST_REG_STACK; 17535 } 17536 17537 if (insn_flags) { 17538 err = push_jmp_history(env, this_branch, insn_flags, 0); 17539 if (err) 17540 return err; 17541 } 17542 17543 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 17544 pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32); 17545 if (pred >= 0) { 17546 /* If we get here with a dst_reg pointer type it is because 17547 * above is_branch_taken() special cased the 0 comparison. 17548 */ 17549 if (!__is_pointer_value(false, dst_reg)) 17550 err = mark_chain_precision(env, insn->dst_reg); 17551 if (BPF_SRC(insn->code) == BPF_X && !err && 17552 !__is_pointer_value(false, src_reg)) 17553 err = mark_chain_precision(env, insn->src_reg); 17554 if (err) 17555 return err; 17556 } 17557 17558 if (pred == 1) { 17559 /* Only follow the goto, ignore fall-through. If needed, push 17560 * the fall-through branch for simulation under speculative 17561 * execution. 17562 */ 17563 if (!env->bypass_spec_v1) { 17564 err = sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx); 17565 if (err < 0) 17566 return err; 17567 } 17568 if (env->log.level & BPF_LOG_LEVEL) 17569 print_insn_state(env, this_branch, this_branch->curframe); 17570 *insn_idx += insn->off; 17571 return 0; 17572 } else if (pred == 0) { 17573 /* Only follow the fall-through branch, since that's where the 17574 * program will go. If needed, push the goto branch for 17575 * simulation under speculative execution. 17576 */ 17577 if (!env->bypass_spec_v1) { 17578 err = sanitize_speculative_path(env, insn, *insn_idx + insn->off + 1, 17579 *insn_idx); 17580 if (err < 0) 17581 return err; 17582 } 17583 if (env->log.level & BPF_LOG_LEVEL) 17584 print_insn_state(env, this_branch, this_branch->curframe); 17585 return 0; 17586 } 17587 17588 /* Push scalar registers sharing same ID to jump history, 17589 * do this before creating 'other_branch', so that both 17590 * 'this_branch' and 'other_branch' share this history 17591 * if parent state is created. 17592 */ 17593 if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id) 17594 collect_linked_regs(env, this_branch, src_reg->id, &linked_regs); 17595 if (dst_reg->type == SCALAR_VALUE && dst_reg->id) 17596 collect_linked_regs(env, this_branch, dst_reg->id, &linked_regs); 17597 if (linked_regs.cnt > 1) { 17598 err = push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs)); 17599 if (err) 17600 return err; 17601 } 17602 17603 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, false); 17604 if (IS_ERR(other_branch)) 17605 return PTR_ERR(other_branch); 17606 other_branch_regs = other_branch->frame[other_branch->curframe]->regs; 17607 17608 if (BPF_SRC(insn->code) == BPF_X) { 17609 err = reg_set_min_max(env, 17610 &other_branch_regs[insn->dst_reg], 17611 &other_branch_regs[insn->src_reg], 17612 dst_reg, src_reg, opcode, is_jmp32); 17613 } else /* BPF_SRC(insn->code) == BPF_K */ { 17614 /* reg_set_min_max() can mangle the fake_reg. Make a copy 17615 * so that these are two different memory locations. The 17616 * src_reg is not used beyond here in context of K. 17617 */ 17618 memcpy(&env->fake_reg[1], &env->fake_reg[0], 17619 sizeof(env->fake_reg[0])); 17620 err = reg_set_min_max(env, 17621 &other_branch_regs[insn->dst_reg], 17622 &env->fake_reg[0], 17623 dst_reg, &env->fake_reg[1], 17624 opcode, is_jmp32); 17625 } 17626 if (err) 17627 return err; 17628 17629 if (BPF_SRC(insn->code) == BPF_X && 17630 src_reg->type == SCALAR_VALUE && src_reg->id && 17631 !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) { 17632 sync_linked_regs(env, this_branch, src_reg, &linked_regs); 17633 sync_linked_regs(env, other_branch, &other_branch_regs[insn->src_reg], 17634 &linked_regs); 17635 } 17636 if (dst_reg->type == SCALAR_VALUE && dst_reg->id && 17637 !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) { 17638 sync_linked_regs(env, this_branch, dst_reg, &linked_regs); 17639 sync_linked_regs(env, other_branch, &other_branch_regs[insn->dst_reg], 17640 &linked_regs); 17641 } 17642 17643 /* if one pointer register is compared to another pointer 17644 * register check if PTR_MAYBE_NULL could be lifted. 17645 * E.g. register A - maybe null 17646 * register B - not null 17647 * for JNE A, B, ... - A is not null in the false branch; 17648 * for JEQ A, B, ... - A is not null in the true branch. 17649 * 17650 * Since PTR_TO_BTF_ID points to a kernel struct that does 17651 * not need to be null checked by the BPF program, i.e., 17652 * could be null even without PTR_MAYBE_NULL marking, so 17653 * only propagate nullness when neither reg is that type. 17654 */ 17655 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X && 17656 __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) && 17657 type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) && 17658 base_type(src_reg->type) != PTR_TO_BTF_ID && 17659 base_type(dst_reg->type) != PTR_TO_BTF_ID) { 17660 eq_branch_regs = NULL; 17661 switch (opcode) { 17662 case BPF_JEQ: 17663 eq_branch_regs = other_branch_regs; 17664 break; 17665 case BPF_JNE: 17666 eq_branch_regs = regs; 17667 break; 17668 default: 17669 /* do nothing */ 17670 break; 17671 } 17672 if (eq_branch_regs) { 17673 if (type_may_be_null(src_reg->type)) 17674 mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]); 17675 else 17676 mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]); 17677 } 17678 } 17679 17680 /* detect if R == 0 where R is returned from bpf_map_lookup_elem(). 17681 * NOTE: these optimizations below are related with pointer comparison 17682 * which will never be JMP32. 17683 */ 17684 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && 17685 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && 17686 type_may_be_null(dst_reg->type)) { 17687 /* Mark all identical registers in each branch as either 17688 * safe or unknown depending R == 0 or R != 0 conditional. 17689 */ 17690 mark_ptr_or_null_regs(this_branch, insn->dst_reg, 17691 opcode == BPF_JNE); 17692 mark_ptr_or_null_regs(other_branch, insn->dst_reg, 17693 opcode == BPF_JEQ); 17694 } else if (!try_match_pkt_pointers(insn, dst_reg, ®s[insn->src_reg], 17695 this_branch, other_branch) && 17696 is_pointer_value(env, insn->dst_reg)) { 17697 verbose(env, "R%d pointer comparison prohibited\n", 17698 insn->dst_reg); 17699 return -EACCES; 17700 } 17701 if (env->log.level & BPF_LOG_LEVEL) 17702 print_insn_state(env, this_branch, this_branch->curframe); 17703 return 0; 17704 } 17705 17706 /* verify BPF_LD_IMM64 instruction */ 17707 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) 17708 { 17709 struct bpf_insn_aux_data *aux = cur_aux(env); 17710 struct bpf_reg_state *regs = cur_regs(env); 17711 struct bpf_reg_state *dst_reg; 17712 struct bpf_map *map; 17713 int err; 17714 17715 if (BPF_SIZE(insn->code) != BPF_DW) { 17716 verbose(env, "invalid BPF_LD_IMM insn\n"); 17717 return -EINVAL; 17718 } 17719 if (insn->off != 0) { 17720 verbose(env, "BPF_LD_IMM64 uses reserved fields\n"); 17721 return -EINVAL; 17722 } 17723 17724 err = check_reg_arg(env, insn->dst_reg, DST_OP); 17725 if (err) 17726 return err; 17727 17728 dst_reg = ®s[insn->dst_reg]; 17729 if (insn->src_reg == 0) { 17730 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; 17731 17732 dst_reg->type = SCALAR_VALUE; 17733 __mark_reg_known(®s[insn->dst_reg], imm); 17734 return 0; 17735 } 17736 17737 /* All special src_reg cases are listed below. From this point onwards 17738 * we either succeed and assign a corresponding dst_reg->type after 17739 * zeroing the offset, or fail and reject the program. 17740 */ 17741 mark_reg_known_zero(env, regs, insn->dst_reg); 17742 17743 if (insn->src_reg == BPF_PSEUDO_BTF_ID) { 17744 dst_reg->type = aux->btf_var.reg_type; 17745 switch (base_type(dst_reg->type)) { 17746 case PTR_TO_MEM: 17747 dst_reg->mem_size = aux->btf_var.mem_size; 17748 break; 17749 case PTR_TO_BTF_ID: 17750 dst_reg->btf = aux->btf_var.btf; 17751 dst_reg->btf_id = aux->btf_var.btf_id; 17752 break; 17753 default: 17754 verifier_bug(env, "pseudo btf id: unexpected dst reg type"); 17755 return -EFAULT; 17756 } 17757 return 0; 17758 } 17759 17760 if (insn->src_reg == BPF_PSEUDO_FUNC) { 17761 struct bpf_prog_aux *aux = env->prog->aux; 17762 u32 subprogno = find_subprog(env, 17763 env->insn_idx + insn->imm + 1); 17764 17765 if (!aux->func_info) { 17766 verbose(env, "missing btf func_info\n"); 17767 return -EINVAL; 17768 } 17769 if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) { 17770 verbose(env, "callback function not static\n"); 17771 return -EINVAL; 17772 } 17773 17774 dst_reg->type = PTR_TO_FUNC; 17775 dst_reg->subprogno = subprogno; 17776 return 0; 17777 } 17778 17779 map = env->used_maps[aux->map_index]; 17780 dst_reg->map_ptr = map; 17781 17782 if (insn->src_reg == BPF_PSEUDO_MAP_VALUE || 17783 insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) { 17784 if (map->map_type == BPF_MAP_TYPE_ARENA) { 17785 __mark_reg_unknown(env, dst_reg); 17786 return 0; 17787 } 17788 dst_reg->type = PTR_TO_MAP_VALUE; 17789 dst_reg->off = aux->map_off; 17790 WARN_ON_ONCE(map->map_type != BPF_MAP_TYPE_INSN_ARRAY && 17791 map->max_entries != 1); 17792 /* We want reg->id to be same (0) as map_value is not distinct */ 17793 } else if (insn->src_reg == BPF_PSEUDO_MAP_FD || 17794 insn->src_reg == BPF_PSEUDO_MAP_IDX) { 17795 dst_reg->type = CONST_PTR_TO_MAP; 17796 } else { 17797 verifier_bug(env, "unexpected src reg value for ldimm64"); 17798 return -EFAULT; 17799 } 17800 17801 return 0; 17802 } 17803 17804 static bool may_access_skb(enum bpf_prog_type type) 17805 { 17806 switch (type) { 17807 case BPF_PROG_TYPE_SOCKET_FILTER: 17808 case BPF_PROG_TYPE_SCHED_CLS: 17809 case BPF_PROG_TYPE_SCHED_ACT: 17810 return true; 17811 default: 17812 return false; 17813 } 17814 } 17815 17816 /* verify safety of LD_ABS|LD_IND instructions: 17817 * - they can only appear in the programs where ctx == skb 17818 * - since they are wrappers of function calls, they scratch R1-R5 registers, 17819 * preserve R6-R9, and store return value into R0 17820 * 17821 * Implicit input: 17822 * ctx == skb == R6 == CTX 17823 * 17824 * Explicit input: 17825 * SRC == any register 17826 * IMM == 32-bit immediate 17827 * 17828 * Output: 17829 * R0 - 8/16/32-bit skb data converted to cpu endianness 17830 */ 17831 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) 17832 { 17833 struct bpf_reg_state *regs = cur_regs(env); 17834 static const int ctx_reg = BPF_REG_6; 17835 u8 mode = BPF_MODE(insn->code); 17836 int i, err; 17837 17838 if (!may_access_skb(resolve_prog_type(env->prog))) { 17839 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n"); 17840 return -EINVAL; 17841 } 17842 17843 if (!env->ops->gen_ld_abs) { 17844 verifier_bug(env, "gen_ld_abs is null"); 17845 return -EFAULT; 17846 } 17847 17848 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || 17849 BPF_SIZE(insn->code) == BPF_DW || 17850 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { 17851 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n"); 17852 return -EINVAL; 17853 } 17854 17855 /* check whether implicit source operand (register R6) is readable */ 17856 err = check_reg_arg(env, ctx_reg, SRC_OP); 17857 if (err) 17858 return err; 17859 17860 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as 17861 * gen_ld_abs() may terminate the program at runtime, leading to 17862 * reference leak. 17863 */ 17864 err = check_resource_leak(env, false, true, "BPF_LD_[ABS|IND]"); 17865 if (err) 17866 return err; 17867 17868 if (regs[ctx_reg].type != PTR_TO_CTX) { 17869 verbose(env, 17870 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); 17871 return -EINVAL; 17872 } 17873 17874 if (mode == BPF_IND) { 17875 /* check explicit source operand */ 17876 err = check_reg_arg(env, insn->src_reg, SRC_OP); 17877 if (err) 17878 return err; 17879 } 17880 17881 err = check_ptr_off_reg(env, ®s[ctx_reg], ctx_reg); 17882 if (err < 0) 17883 return err; 17884 17885 /* reset caller saved regs to unreadable */ 17886 for (i = 0; i < CALLER_SAVED_REGS; i++) { 17887 mark_reg_not_init(env, regs, caller_saved[i]); 17888 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 17889 } 17890 17891 /* mark destination R0 register as readable, since it contains 17892 * the value fetched from the packet. 17893 * Already marked as written above. 17894 */ 17895 mark_reg_unknown(env, regs, BPF_REG_0); 17896 /* ld_abs load up to 32-bit skb data. */ 17897 regs[BPF_REG_0].subreg_def = env->insn_idx + 1; 17898 return 0; 17899 } 17900 17901 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name) 17902 { 17903 const char *exit_ctx = "At program exit"; 17904 struct tnum enforce_attach_type_range = tnum_unknown; 17905 const struct bpf_prog *prog = env->prog; 17906 struct bpf_reg_state *reg = reg_state(env, regno); 17907 struct bpf_retval_range range = retval_range(0, 1); 17908 enum bpf_prog_type prog_type = resolve_prog_type(env->prog); 17909 int err; 17910 struct bpf_func_state *frame = env->cur_state->frame[0]; 17911 const bool is_subprog = frame->subprogno; 17912 bool return_32bit = false; 17913 const struct btf_type *reg_type, *ret_type = NULL; 17914 17915 /* LSM and struct_ops func-ptr's return type could be "void" */ 17916 if (!is_subprog || frame->in_exception_callback_fn) { 17917 switch (prog_type) { 17918 case BPF_PROG_TYPE_LSM: 17919 if (prog->expected_attach_type == BPF_LSM_CGROUP) 17920 /* See below, can be 0 or 0-1 depending on hook. */ 17921 break; 17922 if (!prog->aux->attach_func_proto->type) 17923 return 0; 17924 break; 17925 case BPF_PROG_TYPE_STRUCT_OPS: 17926 if (!prog->aux->attach_func_proto->type) 17927 return 0; 17928 17929 if (frame->in_exception_callback_fn) 17930 break; 17931 17932 /* Allow a struct_ops program to return a referenced kptr if it 17933 * matches the operator's return type and is in its unmodified 17934 * form. A scalar zero (i.e., a null pointer) is also allowed. 17935 */ 17936 reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL; 17937 ret_type = btf_type_resolve_ptr(prog->aux->attach_btf, 17938 prog->aux->attach_func_proto->type, 17939 NULL); 17940 if (ret_type && ret_type == reg_type && reg->ref_obj_id) 17941 return __check_ptr_off_reg(env, reg, regno, false); 17942 break; 17943 default: 17944 break; 17945 } 17946 } 17947 17948 /* eBPF calling convention is such that R0 is used 17949 * to return the value from eBPF program. 17950 * Make sure that it's readable at this time 17951 * of bpf_exit, which means that program wrote 17952 * something into it earlier 17953 */ 17954 err = check_reg_arg(env, regno, SRC_OP); 17955 if (err) 17956 return err; 17957 17958 if (is_pointer_value(env, regno)) { 17959 verbose(env, "R%d leaks addr as return value\n", regno); 17960 return -EACCES; 17961 } 17962 17963 if (frame->in_async_callback_fn) { 17964 exit_ctx = "At async callback return"; 17965 range = frame->callback_ret_range; 17966 goto enforce_retval; 17967 } 17968 17969 if (is_subprog && !frame->in_exception_callback_fn) { 17970 if (reg->type != SCALAR_VALUE) { 17971 verbose(env, "At subprogram exit the register R%d is not a scalar value (%s)\n", 17972 regno, reg_type_str(env, reg->type)); 17973 return -EINVAL; 17974 } 17975 return 0; 17976 } 17977 17978 switch (prog_type) { 17979 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 17980 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || 17981 env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG || 17982 env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG || 17983 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME || 17984 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME || 17985 env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME || 17986 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME || 17987 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME || 17988 env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME) 17989 range = retval_range(1, 1); 17990 if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND || 17991 env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND) 17992 range = retval_range(0, 3); 17993 break; 17994 case BPF_PROG_TYPE_CGROUP_SKB: 17995 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) { 17996 range = retval_range(0, 3); 17997 enforce_attach_type_range = tnum_range(2, 3); 17998 } 17999 break; 18000 case BPF_PROG_TYPE_CGROUP_SOCK: 18001 case BPF_PROG_TYPE_SOCK_OPS: 18002 case BPF_PROG_TYPE_CGROUP_DEVICE: 18003 case BPF_PROG_TYPE_CGROUP_SYSCTL: 18004 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 18005 break; 18006 case BPF_PROG_TYPE_RAW_TRACEPOINT: 18007 if (!env->prog->aux->attach_btf_id) 18008 return 0; 18009 range = retval_range(0, 0); 18010 break; 18011 case BPF_PROG_TYPE_TRACING: 18012 switch (env->prog->expected_attach_type) { 18013 case BPF_TRACE_FENTRY: 18014 case BPF_TRACE_FEXIT: 18015 case BPF_TRACE_FSESSION: 18016 range = retval_range(0, 0); 18017 break; 18018 case BPF_TRACE_RAW_TP: 18019 case BPF_MODIFY_RETURN: 18020 return 0; 18021 case BPF_TRACE_ITER: 18022 break; 18023 default: 18024 return -ENOTSUPP; 18025 } 18026 break; 18027 case BPF_PROG_TYPE_KPROBE: 18028 switch (env->prog->expected_attach_type) { 18029 case BPF_TRACE_KPROBE_SESSION: 18030 case BPF_TRACE_UPROBE_SESSION: 18031 range = retval_range(0, 1); 18032 break; 18033 default: 18034 return 0; 18035 } 18036 break; 18037 case BPF_PROG_TYPE_SK_LOOKUP: 18038 range = retval_range(SK_DROP, SK_PASS); 18039 break; 18040 18041 case BPF_PROG_TYPE_LSM: 18042 if (env->prog->expected_attach_type != BPF_LSM_CGROUP) { 18043 /* no range found, any return value is allowed */ 18044 if (!get_func_retval_range(env->prog, &range)) 18045 return 0; 18046 /* no restricted range, any return value is allowed */ 18047 if (range.minval == S32_MIN && range.maxval == S32_MAX) 18048 return 0; 18049 return_32bit = true; 18050 } else if (!env->prog->aux->attach_func_proto->type) { 18051 /* Make sure programs that attach to void 18052 * hooks don't try to modify return value. 18053 */ 18054 range = retval_range(1, 1); 18055 } 18056 break; 18057 18058 case BPF_PROG_TYPE_NETFILTER: 18059 range = retval_range(NF_DROP, NF_ACCEPT); 18060 break; 18061 case BPF_PROG_TYPE_STRUCT_OPS: 18062 if (!ret_type) 18063 return 0; 18064 range = retval_range(0, 0); 18065 break; 18066 case BPF_PROG_TYPE_EXT: 18067 /* freplace program can return anything as its return value 18068 * depends on the to-be-replaced kernel func or bpf program. 18069 */ 18070 default: 18071 return 0; 18072 } 18073 18074 enforce_retval: 18075 if (reg->type != SCALAR_VALUE) { 18076 verbose(env, "%s the register R%d is not a known value (%s)\n", 18077 exit_ctx, regno, reg_type_str(env, reg->type)); 18078 return -EINVAL; 18079 } 18080 18081 err = mark_chain_precision(env, regno); 18082 if (err) 18083 return err; 18084 18085 if (!retval_range_within(range, reg, return_32bit)) { 18086 verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name); 18087 if (!is_subprog && 18088 prog->expected_attach_type == BPF_LSM_CGROUP && 18089 prog_type == BPF_PROG_TYPE_LSM && 18090 !prog->aux->attach_func_proto->type) 18091 verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n"); 18092 return -EINVAL; 18093 } 18094 18095 if (!tnum_is_unknown(enforce_attach_type_range) && 18096 tnum_in(enforce_attach_type_range, reg->var_off)) 18097 env->prog->enforce_expected_attach_type = 1; 18098 return 0; 18099 } 18100 18101 static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off) 18102 { 18103 struct bpf_subprog_info *subprog; 18104 18105 subprog = bpf_find_containing_subprog(env, off); 18106 subprog->changes_pkt_data = true; 18107 } 18108 18109 static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off) 18110 { 18111 struct bpf_subprog_info *subprog; 18112 18113 subprog = bpf_find_containing_subprog(env, off); 18114 subprog->might_sleep = true; 18115 } 18116 18117 /* 't' is an index of a call-site. 18118 * 'w' is a callee entry point. 18119 * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED. 18120 * Rely on DFS traversal order and absence of recursive calls to guarantee that 18121 * callee's change_pkt_data marks would be correct at that moment. 18122 */ 18123 static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w) 18124 { 18125 struct bpf_subprog_info *caller, *callee; 18126 18127 caller = bpf_find_containing_subprog(env, t); 18128 callee = bpf_find_containing_subprog(env, w); 18129 caller->changes_pkt_data |= callee->changes_pkt_data; 18130 caller->might_sleep |= callee->might_sleep; 18131 } 18132 18133 /* non-recursive DFS pseudo code 18134 * 1 procedure DFS-iterative(G,v): 18135 * 2 label v as discovered 18136 * 3 let S be a stack 18137 * 4 S.push(v) 18138 * 5 while S is not empty 18139 * 6 t <- S.peek() 18140 * 7 if t is what we're looking for: 18141 * 8 return t 18142 * 9 for all edges e in G.adjacentEdges(t) do 18143 * 10 if edge e is already labelled 18144 * 11 continue with the next edge 18145 * 12 w <- G.adjacentVertex(t,e) 18146 * 13 if vertex w is not discovered and not explored 18147 * 14 label e as tree-edge 18148 * 15 label w as discovered 18149 * 16 S.push(w) 18150 * 17 continue at 5 18151 * 18 else if vertex w is discovered 18152 * 19 label e as back-edge 18153 * 20 else 18154 * 21 // vertex w is explored 18155 * 22 label e as forward- or cross-edge 18156 * 23 label t as explored 18157 * 24 S.pop() 18158 * 18159 * convention: 18160 * 0x10 - discovered 18161 * 0x11 - discovered and fall-through edge labelled 18162 * 0x12 - discovered and fall-through and branch edges labelled 18163 * 0x20 - explored 18164 */ 18165 18166 enum { 18167 DISCOVERED = 0x10, 18168 EXPLORED = 0x20, 18169 FALLTHROUGH = 1, 18170 BRANCH = 2, 18171 }; 18172 18173 static void mark_prune_point(struct bpf_verifier_env *env, int idx) 18174 { 18175 env->insn_aux_data[idx].prune_point = true; 18176 } 18177 18178 static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx) 18179 { 18180 return env->insn_aux_data[insn_idx].prune_point; 18181 } 18182 18183 static void mark_force_checkpoint(struct bpf_verifier_env *env, int idx) 18184 { 18185 env->insn_aux_data[idx].force_checkpoint = true; 18186 } 18187 18188 static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx) 18189 { 18190 return env->insn_aux_data[insn_idx].force_checkpoint; 18191 } 18192 18193 static void mark_calls_callback(struct bpf_verifier_env *env, int idx) 18194 { 18195 env->insn_aux_data[idx].calls_callback = true; 18196 } 18197 18198 bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx) 18199 { 18200 return env->insn_aux_data[insn_idx].calls_callback; 18201 } 18202 18203 enum { 18204 DONE_EXPLORING = 0, 18205 KEEP_EXPLORING = 1, 18206 }; 18207 18208 /* t, w, e - match pseudo-code above: 18209 * t - index of current instruction 18210 * w - next instruction 18211 * e - edge 18212 */ 18213 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) 18214 { 18215 int *insn_stack = env->cfg.insn_stack; 18216 int *insn_state = env->cfg.insn_state; 18217 18218 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) 18219 return DONE_EXPLORING; 18220 18221 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH)) 18222 return DONE_EXPLORING; 18223 18224 if (w < 0 || w >= env->prog->len) { 18225 verbose_linfo(env, t, "%d: ", t); 18226 verbose(env, "jump out of range from insn %d to %d\n", t, w); 18227 return -EINVAL; 18228 } 18229 18230 if (e == BRANCH) { 18231 /* mark branch target for state pruning */ 18232 mark_prune_point(env, w); 18233 mark_jmp_point(env, w); 18234 } 18235 18236 if (insn_state[w] == 0) { 18237 /* tree-edge */ 18238 insn_state[t] = DISCOVERED | e; 18239 insn_state[w] = DISCOVERED; 18240 if (env->cfg.cur_stack >= env->prog->len) 18241 return -E2BIG; 18242 insn_stack[env->cfg.cur_stack++] = w; 18243 return KEEP_EXPLORING; 18244 } else if ((insn_state[w] & 0xF0) == DISCOVERED) { 18245 if (env->bpf_capable) 18246 return DONE_EXPLORING; 18247 verbose_linfo(env, t, "%d: ", t); 18248 verbose_linfo(env, w, "%d: ", w); 18249 verbose(env, "back-edge from insn %d to %d\n", t, w); 18250 return -EINVAL; 18251 } else if (insn_state[w] == EXPLORED) { 18252 /* forward- or cross-edge */ 18253 insn_state[t] = DISCOVERED | e; 18254 } else { 18255 verifier_bug(env, "insn state internal bug"); 18256 return -EFAULT; 18257 } 18258 return DONE_EXPLORING; 18259 } 18260 18261 static int visit_func_call_insn(int t, struct bpf_insn *insns, 18262 struct bpf_verifier_env *env, 18263 bool visit_callee) 18264 { 18265 int ret, insn_sz; 18266 int w; 18267 18268 insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1; 18269 ret = push_insn(t, t + insn_sz, FALLTHROUGH, env); 18270 if (ret) 18271 return ret; 18272 18273 mark_prune_point(env, t + insn_sz); 18274 /* when we exit from subprog, we need to record non-linear history */ 18275 mark_jmp_point(env, t + insn_sz); 18276 18277 if (visit_callee) { 18278 w = t + insns[t].imm + 1; 18279 mark_prune_point(env, t); 18280 merge_callee_effects(env, t, w); 18281 ret = push_insn(t, w, BRANCH, env); 18282 } 18283 return ret; 18284 } 18285 18286 /* Bitmask with 1s for all caller saved registers */ 18287 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1) 18288 18289 /* True if do_misc_fixups() replaces calls to helper number 'imm', 18290 * replacement patch is presumed to follow bpf_fastcall contract 18291 * (see mark_fastcall_pattern_for_call() below). 18292 */ 18293 static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm) 18294 { 18295 switch (imm) { 18296 #ifdef CONFIG_X86_64 18297 case BPF_FUNC_get_smp_processor_id: 18298 #ifdef CONFIG_SMP 18299 case BPF_FUNC_get_current_task_btf: 18300 case BPF_FUNC_get_current_task: 18301 #endif 18302 return env->prog->jit_requested && bpf_jit_supports_percpu_insn(); 18303 #endif 18304 default: 18305 return false; 18306 } 18307 } 18308 18309 struct call_summary { 18310 u8 num_params; 18311 bool is_void; 18312 bool fastcall; 18313 }; 18314 18315 /* If @call is a kfunc or helper call, fills @cs and returns true, 18316 * otherwise returns false. 18317 */ 18318 static bool get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call, 18319 struct call_summary *cs) 18320 { 18321 struct bpf_kfunc_call_arg_meta meta; 18322 const struct bpf_func_proto *fn; 18323 int i; 18324 18325 if (bpf_helper_call(call)) { 18326 18327 if (get_helper_proto(env, call->imm, &fn) < 0) 18328 /* error would be reported later */ 18329 return false; 18330 cs->fastcall = fn->allow_fastcall && 18331 (verifier_inlines_helper_call(env, call->imm) || 18332 bpf_jit_inlines_helper_call(call->imm)); 18333 cs->is_void = fn->ret_type == RET_VOID; 18334 cs->num_params = 0; 18335 for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) { 18336 if (fn->arg_type[i] == ARG_DONTCARE) 18337 break; 18338 cs->num_params++; 18339 } 18340 return true; 18341 } 18342 18343 if (bpf_pseudo_kfunc_call(call)) { 18344 int err; 18345 18346 err = fetch_kfunc_arg_meta(env, call->imm, call->off, &meta); 18347 if (err < 0) 18348 /* error would be reported later */ 18349 return false; 18350 cs->num_params = btf_type_vlen(meta.func_proto); 18351 cs->fastcall = meta.kfunc_flags & KF_FASTCALL; 18352 cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type)); 18353 return true; 18354 } 18355 18356 return false; 18357 } 18358 18359 /* LLVM define a bpf_fastcall function attribute. 18360 * This attribute means that function scratches only some of 18361 * the caller saved registers defined by ABI. 18362 * For BPF the set of such registers could be defined as follows: 18363 * - R0 is scratched only if function is non-void; 18364 * - R1-R5 are scratched only if corresponding parameter type is defined 18365 * in the function prototype. 18366 * 18367 * The contract between kernel and clang allows to simultaneously use 18368 * such functions and maintain backwards compatibility with old 18369 * kernels that don't understand bpf_fastcall calls: 18370 * 18371 * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5 18372 * registers are not scratched by the call; 18373 * 18374 * - as a post-processing step, clang visits each bpf_fastcall call and adds 18375 * spill/fill for every live r0-r5; 18376 * 18377 * - stack offsets used for the spill/fill are allocated as lowest 18378 * stack offsets in whole function and are not used for any other 18379 * purposes; 18380 * 18381 * - when kernel loads a program, it looks for such patterns 18382 * (bpf_fastcall function surrounded by spills/fills) and checks if 18383 * spill/fill stack offsets are used exclusively in fastcall patterns; 18384 * 18385 * - if so, and if verifier or current JIT inlines the call to the 18386 * bpf_fastcall function (e.g. a helper call), kernel removes unnecessary 18387 * spill/fill pairs; 18388 * 18389 * - when old kernel loads a program, presence of spill/fill pairs 18390 * keeps BPF program valid, albeit slightly less efficient. 18391 * 18392 * For example: 18393 * 18394 * r1 = 1; 18395 * r2 = 2; 18396 * *(u64 *)(r10 - 8) = r1; r1 = 1; 18397 * *(u64 *)(r10 - 16) = r2; r2 = 2; 18398 * call %[to_be_inlined] --> call %[to_be_inlined] 18399 * r2 = *(u64 *)(r10 - 16); r0 = r1; 18400 * r1 = *(u64 *)(r10 - 8); r0 += r2; 18401 * r0 = r1; exit; 18402 * r0 += r2; 18403 * exit; 18404 * 18405 * The purpose of mark_fastcall_pattern_for_call is to: 18406 * - look for such patterns; 18407 * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern; 18408 * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction; 18409 * - update env->subprog_info[*]->fastcall_stack_off to find an offset 18410 * at which bpf_fastcall spill/fill stack slots start; 18411 * - update env->subprog_info[*]->keep_fastcall_stack. 18412 * 18413 * The .fastcall_pattern and .fastcall_stack_off are used by 18414 * check_fastcall_stack_contract() to check if every stack access to 18415 * fastcall spill/fill stack slot originates from spill/fill 18416 * instructions, members of fastcall patterns. 18417 * 18418 * If such condition holds true for a subprogram, fastcall patterns could 18419 * be rewritten by remove_fastcall_spills_fills(). 18420 * Otherwise bpf_fastcall patterns are not changed in the subprogram 18421 * (code, presumably, generated by an older clang version). 18422 * 18423 * For example, it is *not* safe to remove spill/fill below: 18424 * 18425 * r1 = 1; 18426 * *(u64 *)(r10 - 8) = r1; r1 = 1; 18427 * call %[to_be_inlined] --> call %[to_be_inlined] 18428 * r1 = *(u64 *)(r10 - 8); r0 = *(u64 *)(r10 - 8); <---- wrong !!! 18429 * r0 = *(u64 *)(r10 - 8); r0 += r1; 18430 * r0 += r1; exit; 18431 * exit; 18432 */ 18433 static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env, 18434 struct bpf_subprog_info *subprog, 18435 int insn_idx, s16 lowest_off) 18436 { 18437 struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx; 18438 struct bpf_insn *call = &env->prog->insnsi[insn_idx]; 18439 u32 clobbered_regs_mask; 18440 struct call_summary cs; 18441 u32 expected_regs_mask; 18442 s16 off; 18443 int i; 18444 18445 if (!get_call_summary(env, call, &cs)) 18446 return; 18447 18448 /* A bitmask specifying which caller saved registers are clobbered 18449 * by a call to a helper/kfunc *as if* this helper/kfunc follows 18450 * bpf_fastcall contract: 18451 * - includes R0 if function is non-void; 18452 * - includes R1-R5 if corresponding parameter has is described 18453 * in the function prototype. 18454 */ 18455 clobbered_regs_mask = GENMASK(cs.num_params, cs.is_void ? 1 : 0); 18456 /* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */ 18457 expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS; 18458 18459 /* match pairs of form: 18460 * 18461 * *(u64 *)(r10 - Y) = rX (where Y % 8 == 0) 18462 * ... 18463 * call %[to_be_inlined] 18464 * ... 18465 * rX = *(u64 *)(r10 - Y) 18466 */ 18467 for (i = 1, off = lowest_off; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) { 18468 if (insn_idx - i < 0 || insn_idx + i >= env->prog->len) 18469 break; 18470 stx = &insns[insn_idx - i]; 18471 ldx = &insns[insn_idx + i]; 18472 /* must be a stack spill/fill pair */ 18473 if (stx->code != (BPF_STX | BPF_MEM | BPF_DW) || 18474 ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) || 18475 stx->dst_reg != BPF_REG_10 || 18476 ldx->src_reg != BPF_REG_10) 18477 break; 18478 /* must be a spill/fill for the same reg */ 18479 if (stx->src_reg != ldx->dst_reg) 18480 break; 18481 /* must be one of the previously unseen registers */ 18482 if ((BIT(stx->src_reg) & expected_regs_mask) == 0) 18483 break; 18484 /* must be a spill/fill for the same expected offset, 18485 * no need to check offset alignment, BPF_DW stack access 18486 * is always 8-byte aligned. 18487 */ 18488 if (stx->off != off || ldx->off != off) 18489 break; 18490 expected_regs_mask &= ~BIT(stx->src_reg); 18491 env->insn_aux_data[insn_idx - i].fastcall_pattern = 1; 18492 env->insn_aux_data[insn_idx + i].fastcall_pattern = 1; 18493 } 18494 if (i == 1) 18495 return; 18496 18497 /* Conditionally set 'fastcall_spills_num' to allow forward 18498 * compatibility when more helper functions are marked as 18499 * bpf_fastcall at compile time than current kernel supports, e.g: 18500 * 18501 * 1: *(u64 *)(r10 - 8) = r1 18502 * 2: call A ;; assume A is bpf_fastcall for current kernel 18503 * 3: r1 = *(u64 *)(r10 - 8) 18504 * 4: *(u64 *)(r10 - 8) = r1 18505 * 5: call B ;; assume B is not bpf_fastcall for current kernel 18506 * 6: r1 = *(u64 *)(r10 - 8) 18507 * 18508 * There is no need to block bpf_fastcall rewrite for such program. 18509 * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy, 18510 * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills() 18511 * does not remove spill/fill pair {4,6}. 18512 */ 18513 if (cs.fastcall) 18514 env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1; 18515 else 18516 subprog->keep_fastcall_stack = 1; 18517 subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off); 18518 } 18519 18520 static int mark_fastcall_patterns(struct bpf_verifier_env *env) 18521 { 18522 struct bpf_subprog_info *subprog = env->subprog_info; 18523 struct bpf_insn *insn; 18524 s16 lowest_off; 18525 int s, i; 18526 18527 for (s = 0; s < env->subprog_cnt; ++s, ++subprog) { 18528 /* find lowest stack spill offset used in this subprog */ 18529 lowest_off = 0; 18530 for (i = subprog->start; i < (subprog + 1)->start; ++i) { 18531 insn = env->prog->insnsi + i; 18532 if (insn->code != (BPF_STX | BPF_MEM | BPF_DW) || 18533 insn->dst_reg != BPF_REG_10) 18534 continue; 18535 lowest_off = min(lowest_off, insn->off); 18536 } 18537 /* use this offset to find fastcall patterns */ 18538 for (i = subprog->start; i < (subprog + 1)->start; ++i) { 18539 insn = env->prog->insnsi + i; 18540 if (insn->code != (BPF_JMP | BPF_CALL)) 18541 continue; 18542 mark_fastcall_pattern_for_call(env, subprog, i, lowest_off); 18543 } 18544 } 18545 return 0; 18546 } 18547 18548 static struct bpf_iarray *iarray_realloc(struct bpf_iarray *old, size_t n_elem) 18549 { 18550 size_t new_size = sizeof(struct bpf_iarray) + n_elem * sizeof(old->items[0]); 18551 struct bpf_iarray *new; 18552 18553 new = kvrealloc(old, new_size, GFP_KERNEL_ACCOUNT); 18554 if (!new) { 18555 /* this is what callers always want, so simplify the call site */ 18556 kvfree(old); 18557 return NULL; 18558 } 18559 18560 new->cnt = n_elem; 18561 return new; 18562 } 18563 18564 static int copy_insn_array(struct bpf_map *map, u32 start, u32 end, u32 *items) 18565 { 18566 struct bpf_insn_array_value *value; 18567 u32 i; 18568 18569 for (i = start; i <= end; i++) { 18570 value = map->ops->map_lookup_elem(map, &i); 18571 /* 18572 * map_lookup_elem of an array map will never return an error, 18573 * but not checking it makes some static analysers to worry 18574 */ 18575 if (IS_ERR(value)) 18576 return PTR_ERR(value); 18577 else if (!value) 18578 return -EINVAL; 18579 items[i - start] = value->xlated_off; 18580 } 18581 return 0; 18582 } 18583 18584 static int cmp_ptr_to_u32(const void *a, const void *b) 18585 { 18586 return *(u32 *)a - *(u32 *)b; 18587 } 18588 18589 static int sort_insn_array_uniq(u32 *items, int cnt) 18590 { 18591 int unique = 1; 18592 int i; 18593 18594 sort(items, cnt, sizeof(items[0]), cmp_ptr_to_u32, NULL); 18595 18596 for (i = 1; i < cnt; i++) 18597 if (items[i] != items[unique - 1]) 18598 items[unique++] = items[i]; 18599 18600 return unique; 18601 } 18602 18603 /* 18604 * sort_unique({map[start], ..., map[end]}) into off 18605 */ 18606 static int copy_insn_array_uniq(struct bpf_map *map, u32 start, u32 end, u32 *off) 18607 { 18608 u32 n = end - start + 1; 18609 int err; 18610 18611 err = copy_insn_array(map, start, end, off); 18612 if (err) 18613 return err; 18614 18615 return sort_insn_array_uniq(off, n); 18616 } 18617 18618 /* 18619 * Copy all unique offsets from the map 18620 */ 18621 static struct bpf_iarray *jt_from_map(struct bpf_map *map) 18622 { 18623 struct bpf_iarray *jt; 18624 int err; 18625 int n; 18626 18627 jt = iarray_realloc(NULL, map->max_entries); 18628 if (!jt) 18629 return ERR_PTR(-ENOMEM); 18630 18631 n = copy_insn_array_uniq(map, 0, map->max_entries - 1, jt->items); 18632 if (n < 0) { 18633 err = n; 18634 goto err_free; 18635 } 18636 if (n == 0) { 18637 err = -EINVAL; 18638 goto err_free; 18639 } 18640 jt->cnt = n; 18641 return jt; 18642 18643 err_free: 18644 kvfree(jt); 18645 return ERR_PTR(err); 18646 } 18647 18648 /* 18649 * Find and collect all maps which fit in the subprog. Return the result as one 18650 * combined jump table in jt->items (allocated with kvcalloc) 18651 */ 18652 static struct bpf_iarray *jt_from_subprog(struct bpf_verifier_env *env, 18653 int subprog_start, int subprog_end) 18654 { 18655 struct bpf_iarray *jt = NULL; 18656 struct bpf_map *map; 18657 struct bpf_iarray *jt_cur; 18658 int i; 18659 18660 for (i = 0; i < env->insn_array_map_cnt; i++) { 18661 /* 18662 * TODO (when needed): collect only jump tables, not static keys 18663 * or maps for indirect calls 18664 */ 18665 map = env->insn_array_maps[i]; 18666 18667 jt_cur = jt_from_map(map); 18668 if (IS_ERR(jt_cur)) { 18669 kvfree(jt); 18670 return jt_cur; 18671 } 18672 18673 /* 18674 * This is enough to check one element. The full table is 18675 * checked to fit inside the subprog later in create_jt() 18676 */ 18677 if (jt_cur->items[0] >= subprog_start && jt_cur->items[0] < subprog_end) { 18678 u32 old_cnt = jt ? jt->cnt : 0; 18679 jt = iarray_realloc(jt, old_cnt + jt_cur->cnt); 18680 if (!jt) { 18681 kvfree(jt_cur); 18682 return ERR_PTR(-ENOMEM); 18683 } 18684 memcpy(jt->items + old_cnt, jt_cur->items, jt_cur->cnt << 2); 18685 } 18686 18687 kvfree(jt_cur); 18688 } 18689 18690 if (!jt) { 18691 verbose(env, "no jump tables found for subprog starting at %u\n", subprog_start); 18692 return ERR_PTR(-EINVAL); 18693 } 18694 18695 jt->cnt = sort_insn_array_uniq(jt->items, jt->cnt); 18696 return jt; 18697 } 18698 18699 static struct bpf_iarray * 18700 create_jt(int t, struct bpf_verifier_env *env) 18701 { 18702 static struct bpf_subprog_info *subprog; 18703 int subprog_start, subprog_end; 18704 struct bpf_iarray *jt; 18705 int i; 18706 18707 subprog = bpf_find_containing_subprog(env, t); 18708 subprog_start = subprog->start; 18709 subprog_end = (subprog + 1)->start; 18710 jt = jt_from_subprog(env, subprog_start, subprog_end); 18711 if (IS_ERR(jt)) 18712 return jt; 18713 18714 /* Check that the every element of the jump table fits within the given subprogram */ 18715 for (i = 0; i < jt->cnt; i++) { 18716 if (jt->items[i] < subprog_start || jt->items[i] >= subprog_end) { 18717 verbose(env, "jump table for insn %d points outside of the subprog [%u,%u]\n", 18718 t, subprog_start, subprog_end); 18719 kvfree(jt); 18720 return ERR_PTR(-EINVAL); 18721 } 18722 } 18723 18724 return jt; 18725 } 18726 18727 /* "conditional jump with N edges" */ 18728 static int visit_gotox_insn(int t, struct bpf_verifier_env *env) 18729 { 18730 int *insn_stack = env->cfg.insn_stack; 18731 int *insn_state = env->cfg.insn_state; 18732 bool keep_exploring = false; 18733 struct bpf_iarray *jt; 18734 int i, w; 18735 18736 jt = env->insn_aux_data[t].jt; 18737 if (!jt) { 18738 jt = create_jt(t, env); 18739 if (IS_ERR(jt)) 18740 return PTR_ERR(jt); 18741 18742 env->insn_aux_data[t].jt = jt; 18743 } 18744 18745 mark_prune_point(env, t); 18746 for (i = 0; i < jt->cnt; i++) { 18747 w = jt->items[i]; 18748 if (w < 0 || w >= env->prog->len) { 18749 verbose(env, "indirect jump out of range from insn %d to %d\n", t, w); 18750 return -EINVAL; 18751 } 18752 18753 mark_jmp_point(env, w); 18754 18755 /* EXPLORED || DISCOVERED */ 18756 if (insn_state[w]) 18757 continue; 18758 18759 if (env->cfg.cur_stack >= env->prog->len) 18760 return -E2BIG; 18761 18762 insn_stack[env->cfg.cur_stack++] = w; 18763 insn_state[w] |= DISCOVERED; 18764 keep_exploring = true; 18765 } 18766 18767 return keep_exploring ? KEEP_EXPLORING : DONE_EXPLORING; 18768 } 18769 18770 static int visit_tailcall_insn(struct bpf_verifier_env *env, int t) 18771 { 18772 static struct bpf_subprog_info *subprog; 18773 struct bpf_iarray *jt; 18774 18775 if (env->insn_aux_data[t].jt) 18776 return 0; 18777 18778 jt = iarray_realloc(NULL, 2); 18779 if (!jt) 18780 return -ENOMEM; 18781 18782 subprog = bpf_find_containing_subprog(env, t); 18783 jt->items[0] = t + 1; 18784 jt->items[1] = subprog->exit_idx; 18785 env->insn_aux_data[t].jt = jt; 18786 return 0; 18787 } 18788 18789 /* Visits the instruction at index t and returns one of the following: 18790 * < 0 - an error occurred 18791 * DONE_EXPLORING - the instruction was fully explored 18792 * KEEP_EXPLORING - there is still work to be done before it is fully explored 18793 */ 18794 static int visit_insn(int t, struct bpf_verifier_env *env) 18795 { 18796 struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t]; 18797 int ret, off, insn_sz; 18798 18799 if (bpf_pseudo_func(insn)) 18800 return visit_func_call_insn(t, insns, env, true); 18801 18802 /* All non-branch instructions have a single fall-through edge. */ 18803 if (BPF_CLASS(insn->code) != BPF_JMP && 18804 BPF_CLASS(insn->code) != BPF_JMP32) { 18805 insn_sz = bpf_is_ldimm64(insn) ? 2 : 1; 18806 return push_insn(t, t + insn_sz, FALLTHROUGH, env); 18807 } 18808 18809 switch (BPF_OP(insn->code)) { 18810 case BPF_EXIT: 18811 return DONE_EXPLORING; 18812 18813 case BPF_CALL: 18814 if (is_async_callback_calling_insn(insn)) 18815 /* Mark this call insn as a prune point to trigger 18816 * is_state_visited() check before call itself is 18817 * processed by __check_func_call(). Otherwise new 18818 * async state will be pushed for further exploration. 18819 */ 18820 mark_prune_point(env, t); 18821 /* For functions that invoke callbacks it is not known how many times 18822 * callback would be called. Verifier models callback calling functions 18823 * by repeatedly visiting callback bodies and returning to origin call 18824 * instruction. 18825 * In order to stop such iteration verifier needs to identify when a 18826 * state identical some state from a previous iteration is reached. 18827 * Check below forces creation of checkpoint before callback calling 18828 * instruction to allow search for such identical states. 18829 */ 18830 if (is_sync_callback_calling_insn(insn)) { 18831 mark_calls_callback(env, t); 18832 mark_force_checkpoint(env, t); 18833 mark_prune_point(env, t); 18834 mark_jmp_point(env, t); 18835 } 18836 if (bpf_helper_call(insn)) { 18837 const struct bpf_func_proto *fp; 18838 18839 ret = get_helper_proto(env, insn->imm, &fp); 18840 /* If called in a non-sleepable context program will be 18841 * rejected anyway, so we should end up with precise 18842 * sleepable marks on subprogs, except for dead code 18843 * elimination. 18844 */ 18845 if (ret == 0 && fp->might_sleep) 18846 mark_subprog_might_sleep(env, t); 18847 if (bpf_helper_changes_pkt_data(insn->imm)) 18848 mark_subprog_changes_pkt_data(env, t); 18849 if (insn->imm == BPF_FUNC_tail_call) 18850 visit_tailcall_insn(env, t); 18851 } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { 18852 struct bpf_kfunc_call_arg_meta meta; 18853 18854 ret = fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta); 18855 if (ret == 0 && is_iter_next_kfunc(&meta)) { 18856 mark_prune_point(env, t); 18857 /* Checking and saving state checkpoints at iter_next() call 18858 * is crucial for fast convergence of open-coded iterator loop 18859 * logic, so we need to force it. If we don't do that, 18860 * is_state_visited() might skip saving a checkpoint, causing 18861 * unnecessarily long sequence of not checkpointed 18862 * instructions and jumps, leading to exhaustion of jump 18863 * history buffer, and potentially other undesired outcomes. 18864 * It is expected that with correct open-coded iterators 18865 * convergence will happen quickly, so we don't run a risk of 18866 * exhausting memory. 18867 */ 18868 mark_force_checkpoint(env, t); 18869 } 18870 /* Same as helpers, if called in a non-sleepable context 18871 * program will be rejected anyway, so we should end up 18872 * with precise sleepable marks on subprogs, except for 18873 * dead code elimination. 18874 */ 18875 if (ret == 0 && is_kfunc_sleepable(&meta)) 18876 mark_subprog_might_sleep(env, t); 18877 if (ret == 0 && is_kfunc_pkt_changing(&meta)) 18878 mark_subprog_changes_pkt_data(env, t); 18879 } 18880 return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); 18881 18882 case BPF_JA: 18883 if (BPF_SRC(insn->code) == BPF_X) 18884 return visit_gotox_insn(t, env); 18885 18886 if (BPF_CLASS(insn->code) == BPF_JMP) 18887 off = insn->off; 18888 else 18889 off = insn->imm; 18890 18891 /* unconditional jump with single edge */ 18892 ret = push_insn(t, t + off + 1, FALLTHROUGH, env); 18893 if (ret) 18894 return ret; 18895 18896 mark_prune_point(env, t + off + 1); 18897 mark_jmp_point(env, t + off + 1); 18898 18899 return ret; 18900 18901 default: 18902 /* conditional jump with two edges */ 18903 mark_prune_point(env, t); 18904 if (is_may_goto_insn(insn)) 18905 mark_force_checkpoint(env, t); 18906 18907 ret = push_insn(t, t + 1, FALLTHROUGH, env); 18908 if (ret) 18909 return ret; 18910 18911 return push_insn(t, t + insn->off + 1, BRANCH, env); 18912 } 18913 } 18914 18915 /* non-recursive depth-first-search to detect loops in BPF program 18916 * loop == back-edge in directed graph 18917 */ 18918 static int check_cfg(struct bpf_verifier_env *env) 18919 { 18920 int insn_cnt = env->prog->len; 18921 int *insn_stack, *insn_state; 18922 int ex_insn_beg, i, ret = 0; 18923 18924 insn_state = env->cfg.insn_state = kvzalloc_objs(int, insn_cnt, 18925 GFP_KERNEL_ACCOUNT); 18926 if (!insn_state) 18927 return -ENOMEM; 18928 18929 insn_stack = env->cfg.insn_stack = kvzalloc_objs(int, insn_cnt, 18930 GFP_KERNEL_ACCOUNT); 18931 if (!insn_stack) { 18932 kvfree(insn_state); 18933 return -ENOMEM; 18934 } 18935 18936 ex_insn_beg = env->exception_callback_subprog 18937 ? env->subprog_info[env->exception_callback_subprog].start 18938 : 0; 18939 18940 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */ 18941 insn_stack[0] = 0; /* 0 is the first instruction */ 18942 env->cfg.cur_stack = 1; 18943 18944 walk_cfg: 18945 while (env->cfg.cur_stack > 0) { 18946 int t = insn_stack[env->cfg.cur_stack - 1]; 18947 18948 ret = visit_insn(t, env); 18949 switch (ret) { 18950 case DONE_EXPLORING: 18951 insn_state[t] = EXPLORED; 18952 env->cfg.cur_stack--; 18953 break; 18954 case KEEP_EXPLORING: 18955 break; 18956 default: 18957 if (ret > 0) { 18958 verifier_bug(env, "visit_insn internal bug"); 18959 ret = -EFAULT; 18960 } 18961 goto err_free; 18962 } 18963 } 18964 18965 if (env->cfg.cur_stack < 0) { 18966 verifier_bug(env, "pop stack internal bug"); 18967 ret = -EFAULT; 18968 goto err_free; 18969 } 18970 18971 if (ex_insn_beg && insn_state[ex_insn_beg] != EXPLORED) { 18972 insn_state[ex_insn_beg] = DISCOVERED; 18973 insn_stack[0] = ex_insn_beg; 18974 env->cfg.cur_stack = 1; 18975 goto walk_cfg; 18976 } 18977 18978 for (i = 0; i < insn_cnt; i++) { 18979 struct bpf_insn *insn = &env->prog->insnsi[i]; 18980 18981 if (insn_state[i] != EXPLORED) { 18982 verbose(env, "unreachable insn %d\n", i); 18983 ret = -EINVAL; 18984 goto err_free; 18985 } 18986 if (bpf_is_ldimm64(insn)) { 18987 if (insn_state[i + 1] != 0) { 18988 verbose(env, "jump into the middle of ldimm64 insn %d\n", i); 18989 ret = -EINVAL; 18990 goto err_free; 18991 } 18992 i++; /* skip second half of ldimm64 */ 18993 } 18994 } 18995 ret = 0; /* cfg looks good */ 18996 env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data; 18997 env->prog->aux->might_sleep = env->subprog_info[0].might_sleep; 18998 18999 err_free: 19000 kvfree(insn_state); 19001 kvfree(insn_stack); 19002 env->cfg.insn_state = env->cfg.insn_stack = NULL; 19003 return ret; 19004 } 19005 19006 /* 19007 * For each subprogram 'i' fill array env->cfg.insn_subprogram sub-range 19008 * [env->subprog_info[i].postorder_start, env->subprog_info[i+1].postorder_start) 19009 * with indices of 'i' instructions in postorder. 19010 */ 19011 static int compute_postorder(struct bpf_verifier_env *env) 19012 { 19013 u32 cur_postorder, i, top, stack_sz, s; 19014 int *stack = NULL, *postorder = NULL, *state = NULL; 19015 struct bpf_iarray *succ; 19016 19017 postorder = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT); 19018 state = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT); 19019 stack = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT); 19020 if (!postorder || !state || !stack) { 19021 kvfree(postorder); 19022 kvfree(state); 19023 kvfree(stack); 19024 return -ENOMEM; 19025 } 19026 cur_postorder = 0; 19027 for (i = 0; i < env->subprog_cnt; i++) { 19028 env->subprog_info[i].postorder_start = cur_postorder; 19029 stack[0] = env->subprog_info[i].start; 19030 stack_sz = 1; 19031 do { 19032 top = stack[stack_sz - 1]; 19033 state[top] |= DISCOVERED; 19034 if (state[top] & EXPLORED) { 19035 postorder[cur_postorder++] = top; 19036 stack_sz--; 19037 continue; 19038 } 19039 succ = bpf_insn_successors(env, top); 19040 for (s = 0; s < succ->cnt; ++s) { 19041 if (!state[succ->items[s]]) { 19042 stack[stack_sz++] = succ->items[s]; 19043 state[succ->items[s]] |= DISCOVERED; 19044 } 19045 } 19046 state[top] |= EXPLORED; 19047 } while (stack_sz); 19048 } 19049 env->subprog_info[i].postorder_start = cur_postorder; 19050 env->cfg.insn_postorder = postorder; 19051 env->cfg.cur_postorder = cur_postorder; 19052 kvfree(stack); 19053 kvfree(state); 19054 return 0; 19055 } 19056 19057 static int check_abnormal_return(struct bpf_verifier_env *env) 19058 { 19059 int i; 19060 19061 for (i = 1; i < env->subprog_cnt; i++) { 19062 if (env->subprog_info[i].has_ld_abs) { 19063 verbose(env, "LD_ABS is not allowed in subprogs without BTF\n"); 19064 return -EINVAL; 19065 } 19066 if (env->subprog_info[i].has_tail_call) { 19067 verbose(env, "tail_call is not allowed in subprogs without BTF\n"); 19068 return -EINVAL; 19069 } 19070 } 19071 return 0; 19072 } 19073 19074 /* The minimum supported BTF func info size */ 19075 #define MIN_BPF_FUNCINFO_SIZE 8 19076 #define MAX_FUNCINFO_REC_SIZE 252 19077 19078 static int check_btf_func_early(struct bpf_verifier_env *env, 19079 const union bpf_attr *attr, 19080 bpfptr_t uattr) 19081 { 19082 u32 krec_size = sizeof(struct bpf_func_info); 19083 const struct btf_type *type, *func_proto; 19084 u32 i, nfuncs, urec_size, min_size; 19085 struct bpf_func_info *krecord; 19086 struct bpf_prog *prog; 19087 const struct btf *btf; 19088 u32 prev_offset = 0; 19089 bpfptr_t urecord; 19090 int ret = -ENOMEM; 19091 19092 nfuncs = attr->func_info_cnt; 19093 if (!nfuncs) { 19094 if (check_abnormal_return(env)) 19095 return -EINVAL; 19096 return 0; 19097 } 19098 19099 urec_size = attr->func_info_rec_size; 19100 if (urec_size < MIN_BPF_FUNCINFO_SIZE || 19101 urec_size > MAX_FUNCINFO_REC_SIZE || 19102 urec_size % sizeof(u32)) { 19103 verbose(env, "invalid func info rec size %u\n", urec_size); 19104 return -EINVAL; 19105 } 19106 19107 prog = env->prog; 19108 btf = prog->aux->btf; 19109 19110 urecord = make_bpfptr(attr->func_info, uattr.is_kernel); 19111 min_size = min_t(u32, krec_size, urec_size); 19112 19113 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL_ACCOUNT | __GFP_NOWARN); 19114 if (!krecord) 19115 return -ENOMEM; 19116 19117 for (i = 0; i < nfuncs; i++) { 19118 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size); 19119 if (ret) { 19120 if (ret == -E2BIG) { 19121 verbose(env, "nonzero tailing record in func info"); 19122 /* set the size kernel expects so loader can zero 19123 * out the rest of the record. 19124 */ 19125 if (copy_to_bpfptr_offset(uattr, 19126 offsetof(union bpf_attr, func_info_rec_size), 19127 &min_size, sizeof(min_size))) 19128 ret = -EFAULT; 19129 } 19130 goto err_free; 19131 } 19132 19133 if (copy_from_bpfptr(&krecord[i], urecord, min_size)) { 19134 ret = -EFAULT; 19135 goto err_free; 19136 } 19137 19138 /* check insn_off */ 19139 ret = -EINVAL; 19140 if (i == 0) { 19141 if (krecord[i].insn_off) { 19142 verbose(env, 19143 "nonzero insn_off %u for the first func info record", 19144 krecord[i].insn_off); 19145 goto err_free; 19146 } 19147 } else if (krecord[i].insn_off <= prev_offset) { 19148 verbose(env, 19149 "same or smaller insn offset (%u) than previous func info record (%u)", 19150 krecord[i].insn_off, prev_offset); 19151 goto err_free; 19152 } 19153 19154 /* check type_id */ 19155 type = btf_type_by_id(btf, krecord[i].type_id); 19156 if (!type || !btf_type_is_func(type)) { 19157 verbose(env, "invalid type id %d in func info", 19158 krecord[i].type_id); 19159 goto err_free; 19160 } 19161 19162 func_proto = btf_type_by_id(btf, type->type); 19163 if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto))) 19164 /* btf_func_check() already verified it during BTF load */ 19165 goto err_free; 19166 19167 prev_offset = krecord[i].insn_off; 19168 bpfptr_add(&urecord, urec_size); 19169 } 19170 19171 prog->aux->func_info = krecord; 19172 prog->aux->func_info_cnt = nfuncs; 19173 return 0; 19174 19175 err_free: 19176 kvfree(krecord); 19177 return ret; 19178 } 19179 19180 static int check_btf_func(struct bpf_verifier_env *env, 19181 const union bpf_attr *attr, 19182 bpfptr_t uattr) 19183 { 19184 const struct btf_type *type, *func_proto, *ret_type; 19185 u32 i, nfuncs, urec_size; 19186 struct bpf_func_info *krecord; 19187 struct bpf_func_info_aux *info_aux = NULL; 19188 struct bpf_prog *prog; 19189 const struct btf *btf; 19190 bpfptr_t urecord; 19191 bool scalar_return; 19192 int ret = -ENOMEM; 19193 19194 nfuncs = attr->func_info_cnt; 19195 if (!nfuncs) { 19196 if (check_abnormal_return(env)) 19197 return -EINVAL; 19198 return 0; 19199 } 19200 if (nfuncs != env->subprog_cnt) { 19201 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n"); 19202 return -EINVAL; 19203 } 19204 19205 urec_size = attr->func_info_rec_size; 19206 19207 prog = env->prog; 19208 btf = prog->aux->btf; 19209 19210 urecord = make_bpfptr(attr->func_info, uattr.is_kernel); 19211 19212 krecord = prog->aux->func_info; 19213 info_aux = kzalloc_objs(*info_aux, nfuncs, 19214 GFP_KERNEL_ACCOUNT | __GFP_NOWARN); 19215 if (!info_aux) 19216 return -ENOMEM; 19217 19218 for (i = 0; i < nfuncs; i++) { 19219 /* check insn_off */ 19220 ret = -EINVAL; 19221 19222 if (env->subprog_info[i].start != krecord[i].insn_off) { 19223 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n"); 19224 goto err_free; 19225 } 19226 19227 /* Already checked type_id */ 19228 type = btf_type_by_id(btf, krecord[i].type_id); 19229 info_aux[i].linkage = BTF_INFO_VLEN(type->info); 19230 /* Already checked func_proto */ 19231 func_proto = btf_type_by_id(btf, type->type); 19232 19233 ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL); 19234 scalar_return = 19235 btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type); 19236 if (i && !scalar_return && env->subprog_info[i].has_ld_abs) { 19237 verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n"); 19238 goto err_free; 19239 } 19240 if (i && !scalar_return && env->subprog_info[i].has_tail_call) { 19241 verbose(env, "tail_call is only allowed in functions that return 'int'.\n"); 19242 goto err_free; 19243 } 19244 19245 bpfptr_add(&urecord, urec_size); 19246 } 19247 19248 prog->aux->func_info_aux = info_aux; 19249 return 0; 19250 19251 err_free: 19252 kfree(info_aux); 19253 return ret; 19254 } 19255 19256 static void adjust_btf_func(struct bpf_verifier_env *env) 19257 { 19258 struct bpf_prog_aux *aux = env->prog->aux; 19259 int i; 19260 19261 if (!aux->func_info) 19262 return; 19263 19264 /* func_info is not available for hidden subprogs */ 19265 for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++) 19266 aux->func_info[i].insn_off = env->subprog_info[i].start; 19267 } 19268 19269 #define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col) 19270 #define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE 19271 19272 static int check_btf_line(struct bpf_verifier_env *env, 19273 const union bpf_attr *attr, 19274 bpfptr_t uattr) 19275 { 19276 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0; 19277 struct bpf_subprog_info *sub; 19278 struct bpf_line_info *linfo; 19279 struct bpf_prog *prog; 19280 const struct btf *btf; 19281 bpfptr_t ulinfo; 19282 int err; 19283 19284 nr_linfo = attr->line_info_cnt; 19285 if (!nr_linfo) 19286 return 0; 19287 if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info)) 19288 return -EINVAL; 19289 19290 rec_size = attr->line_info_rec_size; 19291 if (rec_size < MIN_BPF_LINEINFO_SIZE || 19292 rec_size > MAX_LINEINFO_REC_SIZE || 19293 rec_size & (sizeof(u32) - 1)) 19294 return -EINVAL; 19295 19296 /* Need to zero it in case the userspace may 19297 * pass in a smaller bpf_line_info object. 19298 */ 19299 linfo = kvzalloc_objs(struct bpf_line_info, nr_linfo, 19300 GFP_KERNEL_ACCOUNT | __GFP_NOWARN); 19301 if (!linfo) 19302 return -ENOMEM; 19303 19304 prog = env->prog; 19305 btf = prog->aux->btf; 19306 19307 s = 0; 19308 sub = env->subprog_info; 19309 ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel); 19310 expected_size = sizeof(struct bpf_line_info); 19311 ncopy = min_t(u32, expected_size, rec_size); 19312 for (i = 0; i < nr_linfo; i++) { 19313 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size); 19314 if (err) { 19315 if (err == -E2BIG) { 19316 verbose(env, "nonzero tailing record in line_info"); 19317 if (copy_to_bpfptr_offset(uattr, 19318 offsetof(union bpf_attr, line_info_rec_size), 19319 &expected_size, sizeof(expected_size))) 19320 err = -EFAULT; 19321 } 19322 goto err_free; 19323 } 19324 19325 if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) { 19326 err = -EFAULT; 19327 goto err_free; 19328 } 19329 19330 /* 19331 * Check insn_off to ensure 19332 * 1) strictly increasing AND 19333 * 2) bounded by prog->len 19334 * 19335 * The linfo[0].insn_off == 0 check logically falls into 19336 * the later "missing bpf_line_info for func..." case 19337 * because the first linfo[0].insn_off must be the 19338 * first sub also and the first sub must have 19339 * subprog_info[0].start == 0. 19340 */ 19341 if ((i && linfo[i].insn_off <= prev_offset) || 19342 linfo[i].insn_off >= prog->len) { 19343 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n", 19344 i, linfo[i].insn_off, prev_offset, 19345 prog->len); 19346 err = -EINVAL; 19347 goto err_free; 19348 } 19349 19350 if (!prog->insnsi[linfo[i].insn_off].code) { 19351 verbose(env, 19352 "Invalid insn code at line_info[%u].insn_off\n", 19353 i); 19354 err = -EINVAL; 19355 goto err_free; 19356 } 19357 19358 if (!btf_name_by_offset(btf, linfo[i].line_off) || 19359 !btf_name_by_offset(btf, linfo[i].file_name_off)) { 19360 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i); 19361 err = -EINVAL; 19362 goto err_free; 19363 } 19364 19365 if (s != env->subprog_cnt) { 19366 if (linfo[i].insn_off == sub[s].start) { 19367 sub[s].linfo_idx = i; 19368 s++; 19369 } else if (sub[s].start < linfo[i].insn_off) { 19370 verbose(env, "missing bpf_line_info for func#%u\n", s); 19371 err = -EINVAL; 19372 goto err_free; 19373 } 19374 } 19375 19376 prev_offset = linfo[i].insn_off; 19377 bpfptr_add(&ulinfo, rec_size); 19378 } 19379 19380 if (s != env->subprog_cnt) { 19381 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n", 19382 env->subprog_cnt - s, s); 19383 err = -EINVAL; 19384 goto err_free; 19385 } 19386 19387 prog->aux->linfo = linfo; 19388 prog->aux->nr_linfo = nr_linfo; 19389 19390 return 0; 19391 19392 err_free: 19393 kvfree(linfo); 19394 return err; 19395 } 19396 19397 #define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo) 19398 #define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE 19399 19400 static int check_core_relo(struct bpf_verifier_env *env, 19401 const union bpf_attr *attr, 19402 bpfptr_t uattr) 19403 { 19404 u32 i, nr_core_relo, ncopy, expected_size, rec_size; 19405 struct bpf_core_relo core_relo = {}; 19406 struct bpf_prog *prog = env->prog; 19407 const struct btf *btf = prog->aux->btf; 19408 struct bpf_core_ctx ctx = { 19409 .log = &env->log, 19410 .btf = btf, 19411 }; 19412 bpfptr_t u_core_relo; 19413 int err; 19414 19415 nr_core_relo = attr->core_relo_cnt; 19416 if (!nr_core_relo) 19417 return 0; 19418 if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo)) 19419 return -EINVAL; 19420 19421 rec_size = attr->core_relo_rec_size; 19422 if (rec_size < MIN_CORE_RELO_SIZE || 19423 rec_size > MAX_CORE_RELO_SIZE || 19424 rec_size % sizeof(u32)) 19425 return -EINVAL; 19426 19427 u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel); 19428 expected_size = sizeof(struct bpf_core_relo); 19429 ncopy = min_t(u32, expected_size, rec_size); 19430 19431 /* Unlike func_info and line_info, copy and apply each CO-RE 19432 * relocation record one at a time. 19433 */ 19434 for (i = 0; i < nr_core_relo; i++) { 19435 /* future proofing when sizeof(bpf_core_relo) changes */ 19436 err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size); 19437 if (err) { 19438 if (err == -E2BIG) { 19439 verbose(env, "nonzero tailing record in core_relo"); 19440 if (copy_to_bpfptr_offset(uattr, 19441 offsetof(union bpf_attr, core_relo_rec_size), 19442 &expected_size, sizeof(expected_size))) 19443 err = -EFAULT; 19444 } 19445 break; 19446 } 19447 19448 if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) { 19449 err = -EFAULT; 19450 break; 19451 } 19452 19453 if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) { 19454 verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n", 19455 i, core_relo.insn_off, prog->len); 19456 err = -EINVAL; 19457 break; 19458 } 19459 19460 err = bpf_core_apply(&ctx, &core_relo, i, 19461 &prog->insnsi[core_relo.insn_off / 8]); 19462 if (err) 19463 break; 19464 bpfptr_add(&u_core_relo, rec_size); 19465 } 19466 return err; 19467 } 19468 19469 static int check_btf_info_early(struct bpf_verifier_env *env, 19470 const union bpf_attr *attr, 19471 bpfptr_t uattr) 19472 { 19473 struct btf *btf; 19474 int err; 19475 19476 if (!attr->func_info_cnt && !attr->line_info_cnt) { 19477 if (check_abnormal_return(env)) 19478 return -EINVAL; 19479 return 0; 19480 } 19481 19482 btf = btf_get_by_fd(attr->prog_btf_fd); 19483 if (IS_ERR(btf)) 19484 return PTR_ERR(btf); 19485 if (btf_is_kernel(btf)) { 19486 btf_put(btf); 19487 return -EACCES; 19488 } 19489 env->prog->aux->btf = btf; 19490 19491 err = check_btf_func_early(env, attr, uattr); 19492 if (err) 19493 return err; 19494 return 0; 19495 } 19496 19497 static int check_btf_info(struct bpf_verifier_env *env, 19498 const union bpf_attr *attr, 19499 bpfptr_t uattr) 19500 { 19501 int err; 19502 19503 if (!attr->func_info_cnt && !attr->line_info_cnt) { 19504 if (check_abnormal_return(env)) 19505 return -EINVAL; 19506 return 0; 19507 } 19508 19509 err = check_btf_func(env, attr, uattr); 19510 if (err) 19511 return err; 19512 19513 err = check_btf_line(env, attr, uattr); 19514 if (err) 19515 return err; 19516 19517 err = check_core_relo(env, attr, uattr); 19518 if (err) 19519 return err; 19520 19521 return 0; 19522 } 19523 19524 /* check %cur's range satisfies %old's */ 19525 static bool range_within(const struct bpf_reg_state *old, 19526 const struct bpf_reg_state *cur) 19527 { 19528 return old->umin_value <= cur->umin_value && 19529 old->umax_value >= cur->umax_value && 19530 old->smin_value <= cur->smin_value && 19531 old->smax_value >= cur->smax_value && 19532 old->u32_min_value <= cur->u32_min_value && 19533 old->u32_max_value >= cur->u32_max_value && 19534 old->s32_min_value <= cur->s32_min_value && 19535 old->s32_max_value >= cur->s32_max_value; 19536 } 19537 19538 /* If in the old state two registers had the same id, then they need to have 19539 * the same id in the new state as well. But that id could be different from 19540 * the old state, so we need to track the mapping from old to new ids. 19541 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent 19542 * regs with old id 5 must also have new id 9 for the new state to be safe. But 19543 * regs with a different old id could still have new id 9, we don't care about 19544 * that. 19545 * So we look through our idmap to see if this old id has been seen before. If 19546 * so, we require the new id to match; otherwise, we add the id pair to the map. 19547 */ 19548 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) 19549 { 19550 struct bpf_id_pair *map = idmap->map; 19551 unsigned int i; 19552 19553 /* either both IDs should be set or both should be zero */ 19554 if (!!old_id != !!cur_id) 19555 return false; 19556 19557 if (old_id == 0) /* cur_id == 0 as well */ 19558 return true; 19559 19560 for (i = 0; i < idmap->cnt; i++) { 19561 if (map[i].old == old_id) 19562 return map[i].cur == cur_id; 19563 if (map[i].cur == cur_id) 19564 return false; 19565 } 19566 19567 /* Reached the end of known mappings; haven't seen this id before */ 19568 if (idmap->cnt < BPF_ID_MAP_SIZE) { 19569 map[idmap->cnt].old = old_id; 19570 map[idmap->cnt].cur = cur_id; 19571 idmap->cnt++; 19572 return true; 19573 } 19574 19575 /* We ran out of idmap slots, which should be impossible */ 19576 WARN_ON_ONCE(1); 19577 return false; 19578 } 19579 19580 /* 19581 * Compare scalar register IDs for state equivalence. 19582 * 19583 * When old_id == 0, the old register is independent - not linked to any 19584 * other register. Any linking in the current state only adds constraints, 19585 * making it more restrictive. Since the old state didn't rely on any ID 19586 * relationships for this register, it's always safe to accept cur regardless 19587 * of its ID. Hence, return true immediately. 19588 * 19589 * When old_id != 0 but cur_id == 0, we need to ensure that different 19590 * independent registers in cur don't incorrectly satisfy the ID matching 19591 * requirements of linked registers in old. 19592 * 19593 * Example: if old has r6.id=X and r7.id=X (linked), but cur has r6.id=0 19594 * and r7.id=0 (both independent), without temp IDs both would map old_id=X 19595 * to cur_id=0 and pass. With temp IDs: r6 maps X->temp1, r7 tries to map 19596 * X->temp2, but X is already mapped to temp1, so the check fails correctly. 19597 */ 19598 static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) 19599 { 19600 if (!old_id) 19601 return true; 19602 19603 cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen; 19604 19605 return check_ids(old_id, cur_id, idmap); 19606 } 19607 19608 static void clean_func_state(struct bpf_verifier_env *env, 19609 struct bpf_func_state *st, 19610 u32 ip) 19611 { 19612 u16 live_regs = env->insn_aux_data[ip].live_regs_before; 19613 int i, j; 19614 19615 for (i = 0; i < BPF_REG_FP; i++) { 19616 /* liveness must not touch this register anymore */ 19617 if (!(live_regs & BIT(i))) 19618 /* since the register is unused, clear its state 19619 * to make further comparison simpler 19620 */ 19621 __mark_reg_not_init(env, &st->regs[i]); 19622 } 19623 19624 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) { 19625 if (!bpf_stack_slot_alive(env, st->frameno, i)) { 19626 __mark_reg_not_init(env, &st->stack[i].spilled_ptr); 19627 for (j = 0; j < BPF_REG_SIZE; j++) 19628 st->stack[i].slot_type[j] = STACK_INVALID; 19629 } 19630 } 19631 } 19632 19633 static void clean_verifier_state(struct bpf_verifier_env *env, 19634 struct bpf_verifier_state *st) 19635 { 19636 int i, ip; 19637 19638 bpf_live_stack_query_init(env, st); 19639 st->cleaned = true; 19640 for (i = 0; i <= st->curframe; i++) { 19641 ip = frame_insn_idx(st, i); 19642 clean_func_state(env, st->frame[i], ip); 19643 } 19644 } 19645 19646 /* the parentage chains form a tree. 19647 * the verifier states are added to state lists at given insn and 19648 * pushed into state stack for future exploration. 19649 * when the verifier reaches bpf_exit insn some of the verifier states 19650 * stored in the state lists have their final liveness state already, 19651 * but a lot of states will get revised from liveness point of view when 19652 * the verifier explores other branches. 19653 * Example: 19654 * 1: *(u64)(r10 - 8) = 1 19655 * 2: if r1 == 100 goto pc+1 19656 * 3: *(u64)(r10 - 8) = 2 19657 * 4: r0 = *(u64)(r10 - 8) 19658 * 5: exit 19659 * when the verifier reaches exit insn the stack slot -8 in the state list of 19660 * insn 2 is not yet marked alive. Then the verifier pops the other_branch 19661 * of insn 2 and goes exploring further. After the insn 4 read, liveness 19662 * analysis would propagate read mark for -8 at insn 2. 19663 * 19664 * Since the verifier pushes the branch states as it sees them while exploring 19665 * the program the condition of walking the branch instruction for the second 19666 * time means that all states below this branch were already explored and 19667 * their final liveness marks are already propagated. 19668 * Hence when the verifier completes the search of state list in is_state_visited() 19669 * we can call this clean_live_states() function to clear dead the registers and stack 19670 * slots to simplify state merging. 19671 * 19672 * Important note here that walking the same branch instruction in the callee 19673 * doesn't meant that the states are DONE. The verifier has to compare 19674 * the callsites 19675 */ 19676 19677 /* Find id in idset and increment its count, or add new entry */ 19678 static void idset_cnt_inc(struct bpf_idset *idset, u32 id) 19679 { 19680 u32 i; 19681 19682 for (i = 0; i < idset->num_ids; i++) { 19683 if (idset->entries[i].id == id) { 19684 idset->entries[i].cnt++; 19685 return; 19686 } 19687 } 19688 /* New id */ 19689 if (idset->num_ids < BPF_ID_MAP_SIZE) { 19690 idset->entries[idset->num_ids].id = id; 19691 idset->entries[idset->num_ids].cnt = 1; 19692 idset->num_ids++; 19693 } 19694 } 19695 19696 /* Find id in idset and return its count, or 0 if not found */ 19697 static u32 idset_cnt_get(struct bpf_idset *idset, u32 id) 19698 { 19699 u32 i; 19700 19701 for (i = 0; i < idset->num_ids; i++) { 19702 if (idset->entries[i].id == id) 19703 return idset->entries[i].cnt; 19704 } 19705 return 0; 19706 } 19707 19708 /* 19709 * Clear singular scalar ids in a state. 19710 * A register with a non-zero id is called singular if no other register shares 19711 * the same base id. Such registers can be treated as independent (id=0). 19712 */ 19713 static void clear_singular_ids(struct bpf_verifier_env *env, 19714 struct bpf_verifier_state *st) 19715 { 19716 struct bpf_idset *idset = &env->idset_scratch; 19717 struct bpf_func_state *func; 19718 struct bpf_reg_state *reg; 19719 19720 idset->num_ids = 0; 19721 19722 bpf_for_each_reg_in_vstate(st, func, reg, ({ 19723 if (reg->type != SCALAR_VALUE) 19724 continue; 19725 if (!reg->id) 19726 continue; 19727 idset_cnt_inc(idset, reg->id & ~BPF_ADD_CONST); 19728 })); 19729 19730 bpf_for_each_reg_in_vstate(st, func, reg, ({ 19731 if (reg->type != SCALAR_VALUE) 19732 continue; 19733 if (!reg->id) 19734 continue; 19735 if (idset_cnt_get(idset, reg->id & ~BPF_ADD_CONST) == 1) { 19736 reg->id = 0; 19737 reg->off = 0; 19738 } 19739 })); 19740 } 19741 19742 static void clean_live_states(struct bpf_verifier_env *env, int insn, 19743 struct bpf_verifier_state *cur) 19744 { 19745 struct bpf_verifier_state_list *sl; 19746 struct list_head *pos, *head; 19747 19748 head = explored_state(env, insn); 19749 list_for_each(pos, head) { 19750 sl = container_of(pos, struct bpf_verifier_state_list, node); 19751 if (sl->state.branches) 19752 continue; 19753 if (sl->state.insn_idx != insn || 19754 !same_callsites(&sl->state, cur)) 19755 continue; 19756 if (sl->state.cleaned) 19757 /* all regs in this state in all frames were already marked */ 19758 continue; 19759 if (incomplete_read_marks(env, &sl->state)) 19760 continue; 19761 clean_verifier_state(env, &sl->state); 19762 } 19763 } 19764 19765 static bool regs_exact(const struct bpf_reg_state *rold, 19766 const struct bpf_reg_state *rcur, 19767 struct bpf_idmap *idmap) 19768 { 19769 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 19770 check_ids(rold->id, rcur->id, idmap) && 19771 check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); 19772 } 19773 19774 enum exact_level { 19775 NOT_EXACT, 19776 EXACT, 19777 RANGE_WITHIN 19778 }; 19779 19780 /* Returns true if (rold safe implies rcur safe) */ 19781 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, 19782 struct bpf_reg_state *rcur, struct bpf_idmap *idmap, 19783 enum exact_level exact) 19784 { 19785 if (exact == EXACT) 19786 return regs_exact(rold, rcur, idmap); 19787 19788 if (rold->type == NOT_INIT) 19789 /* explored state can't have used this */ 19790 return true; 19791 19792 /* Enforce that register types have to match exactly, including their 19793 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general 19794 * rule. 19795 * 19796 * One can make a point that using a pointer register as unbounded 19797 * SCALAR would be technically acceptable, but this could lead to 19798 * pointer leaks because scalars are allowed to leak while pointers 19799 * are not. We could make this safe in special cases if root is 19800 * calling us, but it's probably not worth the hassle. 19801 * 19802 * Also, register types that are *not* MAYBE_NULL could technically be 19803 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE 19804 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point 19805 * to the same map). 19806 * However, if the old MAYBE_NULL register then got NULL checked, 19807 * doing so could have affected others with the same id, and we can't 19808 * check for that because we lost the id when we converted to 19809 * a non-MAYBE_NULL variant. 19810 * So, as a general rule we don't allow mixing MAYBE_NULL and 19811 * non-MAYBE_NULL registers as well. 19812 */ 19813 if (rold->type != rcur->type) 19814 return false; 19815 19816 switch (base_type(rold->type)) { 19817 case SCALAR_VALUE: 19818 if (env->explore_alu_limits) { 19819 /* explore_alu_limits disables tnum_in() and range_within() 19820 * logic and requires everything to be strict 19821 */ 19822 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 19823 check_scalar_ids(rold->id, rcur->id, idmap); 19824 } 19825 if (!rold->precise && exact == NOT_EXACT) 19826 return true; 19827 /* 19828 * Linked register tracking uses rold->id to detect relationships. 19829 * When rold->id == 0, the register is independent and any linking 19830 * in rcur only adds constraints. When rold->id != 0, we must verify 19831 * id mapping and (for BPF_ADD_CONST) offset consistency. 19832 * 19833 * +------------------+-----------+------------------+---------------+ 19834 * | | rold->id | rold + ADD_CONST | rold->id == 0 | 19835 * |------------------+-----------+------------------+---------------| 19836 * | rcur->id | range,ids | false | range | 19837 * | rcur + ADD_CONST | false | range,ids,off | range | 19838 * | rcur->id == 0 | range,ids | false | range | 19839 * +------------------+-----------+------------------+---------------+ 19840 * 19841 * Why check_ids() for scalar registers? 19842 * 19843 * Consider the following BPF code: 19844 * 1: r6 = ... unbound scalar, ID=a ... 19845 * 2: r7 = ... unbound scalar, ID=b ... 19846 * 3: if (r6 > r7) goto +1 19847 * 4: r6 = r7 19848 * 5: if (r6 > X) goto ... 19849 * 6: ... memory operation using r7 ... 19850 * 19851 * First verification path is [1-6]: 19852 * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7; 19853 * - at (5) r6 would be marked <= X, sync_linked_regs() would also mark 19854 * r7 <= X, because r6 and r7 share same id. 19855 * Next verification path is [1-4, 6]. 19856 * 19857 * Instruction (6) would be reached in two states: 19858 * I. r6{.id=b}, r7{.id=b} via path 1-6; 19859 * II. r6{.id=a}, r7{.id=b} via path 1-4, 6. 19860 * 19861 * Use check_ids() to distinguish these states. 19862 * --- 19863 * Also verify that new value satisfies old value range knowledge. 19864 */ 19865 19866 /* ADD_CONST mismatch: different linking semantics */ 19867 if ((rold->id & BPF_ADD_CONST) && !(rcur->id & BPF_ADD_CONST)) 19868 return false; 19869 19870 if (rold->id && !(rold->id & BPF_ADD_CONST) && (rcur->id & BPF_ADD_CONST)) 19871 return false; 19872 19873 /* Both have offset linkage: offsets must match */ 19874 if ((rold->id & BPF_ADD_CONST) && rold->off != rcur->off) 19875 return false; 19876 19877 if (!check_scalar_ids(rold->id, rcur->id, idmap)) 19878 return false; 19879 19880 return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off); 19881 case PTR_TO_MAP_KEY: 19882 case PTR_TO_MAP_VALUE: 19883 case PTR_TO_MEM: 19884 case PTR_TO_BUF: 19885 case PTR_TO_TP_BUFFER: 19886 /* If the new min/max/var_off satisfy the old ones and 19887 * everything else matches, we are OK. 19888 */ 19889 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 && 19890 range_within(rold, rcur) && 19891 tnum_in(rold->var_off, rcur->var_off) && 19892 check_ids(rold->id, rcur->id, idmap) && 19893 check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); 19894 case PTR_TO_PACKET_META: 19895 case PTR_TO_PACKET: 19896 /* We must have at least as much range as the old ptr 19897 * did, so that any accesses which were safe before are 19898 * still safe. This is true even if old range < old off, 19899 * since someone could have accessed through (ptr - k), or 19900 * even done ptr -= k in a register, to get a safe access. 19901 */ 19902 if (rold->range > rcur->range) 19903 return false; 19904 /* If the offsets don't match, we can't trust our alignment; 19905 * nor can we be sure that we won't fall out of range. 19906 */ 19907 if (rold->off != rcur->off) 19908 return false; 19909 /* id relations must be preserved */ 19910 if (!check_ids(rold->id, rcur->id, idmap)) 19911 return false; 19912 /* new val must satisfy old val knowledge */ 19913 return range_within(rold, rcur) && 19914 tnum_in(rold->var_off, rcur->var_off); 19915 case PTR_TO_STACK: 19916 /* two stack pointers are equal only if they're pointing to 19917 * the same stack frame, since fp-8 in foo != fp-8 in bar 19918 */ 19919 return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno; 19920 case PTR_TO_ARENA: 19921 return true; 19922 case PTR_TO_INSN: 19923 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 && 19924 rold->off == rcur->off && range_within(rold, rcur) && 19925 tnum_in(rold->var_off, rcur->var_off); 19926 default: 19927 return regs_exact(rold, rcur, idmap); 19928 } 19929 } 19930 19931 static struct bpf_reg_state unbound_reg; 19932 19933 static __init int unbound_reg_init(void) 19934 { 19935 __mark_reg_unknown_imprecise(&unbound_reg); 19936 return 0; 19937 } 19938 late_initcall(unbound_reg_init); 19939 19940 static bool is_stack_all_misc(struct bpf_verifier_env *env, 19941 struct bpf_stack_state *stack) 19942 { 19943 u32 i; 19944 19945 for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i) { 19946 if ((stack->slot_type[i] == STACK_MISC) || 19947 (stack->slot_type[i] == STACK_INVALID && env->allow_uninit_stack)) 19948 continue; 19949 return false; 19950 } 19951 19952 return true; 19953 } 19954 19955 static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env, 19956 struct bpf_stack_state *stack) 19957 { 19958 if (is_spilled_scalar_reg64(stack)) 19959 return &stack->spilled_ptr; 19960 19961 if (is_stack_all_misc(env, stack)) 19962 return &unbound_reg; 19963 19964 return NULL; 19965 } 19966 19967 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, 19968 struct bpf_func_state *cur, struct bpf_idmap *idmap, 19969 enum exact_level exact) 19970 { 19971 int i, spi; 19972 19973 /* walk slots of the explored stack and ignore any additional 19974 * slots in the current stack, since explored(safe) state 19975 * didn't use them 19976 */ 19977 for (i = 0; i < old->allocated_stack; i++) { 19978 struct bpf_reg_state *old_reg, *cur_reg; 19979 19980 spi = i / BPF_REG_SIZE; 19981 19982 if (exact == EXACT && 19983 (i >= cur->allocated_stack || 19984 old->stack[spi].slot_type[i % BPF_REG_SIZE] != 19985 cur->stack[spi].slot_type[i % BPF_REG_SIZE])) 19986 return false; 19987 19988 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) 19989 continue; 19990 19991 if (env->allow_uninit_stack && 19992 old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC) 19993 continue; 19994 19995 /* explored stack has more populated slots than current stack 19996 * and these slots were used 19997 */ 19998 if (i >= cur->allocated_stack) 19999 return false; 20000 20001 /* 64-bit scalar spill vs all slots MISC and vice versa. 20002 * Load from all slots MISC produces unbound scalar. 20003 * Construct a fake register for such stack and call 20004 * regsafe() to ensure scalar ids are compared. 20005 */ 20006 old_reg = scalar_reg_for_stack(env, &old->stack[spi]); 20007 cur_reg = scalar_reg_for_stack(env, &cur->stack[spi]); 20008 if (old_reg && cur_reg) { 20009 if (!regsafe(env, old_reg, cur_reg, idmap, exact)) 20010 return false; 20011 i += BPF_REG_SIZE - 1; 20012 continue; 20013 } 20014 20015 /* if old state was safe with misc data in the stack 20016 * it will be safe with zero-initialized stack. 20017 * The opposite is not true 20018 */ 20019 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC && 20020 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO) 20021 continue; 20022 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != 20023 cur->stack[spi].slot_type[i % BPF_REG_SIZE]) 20024 /* Ex: old explored (safe) state has STACK_SPILL in 20025 * this stack slot, but current has STACK_MISC -> 20026 * this verifier states are not equivalent, 20027 * return false to continue verification of this path 20028 */ 20029 return false; 20030 if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1) 20031 continue; 20032 /* Both old and cur are having same slot_type */ 20033 switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) { 20034 case STACK_SPILL: 20035 /* when explored and current stack slot are both storing 20036 * spilled registers, check that stored pointers types 20037 * are the same as well. 20038 * Ex: explored safe path could have stored 20039 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8} 20040 * but current path has stored: 20041 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16} 20042 * such verifier states are not equivalent. 20043 * return false to continue verification of this path 20044 */ 20045 if (!regsafe(env, &old->stack[spi].spilled_ptr, 20046 &cur->stack[spi].spilled_ptr, idmap, exact)) 20047 return false; 20048 break; 20049 case STACK_DYNPTR: 20050 old_reg = &old->stack[spi].spilled_ptr; 20051 cur_reg = &cur->stack[spi].spilled_ptr; 20052 if (old_reg->dynptr.type != cur_reg->dynptr.type || 20053 old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot || 20054 !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) 20055 return false; 20056 break; 20057 case STACK_ITER: 20058 old_reg = &old->stack[spi].spilled_ptr; 20059 cur_reg = &cur->stack[spi].spilled_ptr; 20060 /* iter.depth is not compared between states as it 20061 * doesn't matter for correctness and would otherwise 20062 * prevent convergence; we maintain it only to prevent 20063 * infinite loop check triggering, see 20064 * iter_active_depths_differ() 20065 */ 20066 if (old_reg->iter.btf != cur_reg->iter.btf || 20067 old_reg->iter.btf_id != cur_reg->iter.btf_id || 20068 old_reg->iter.state != cur_reg->iter.state || 20069 /* ignore {old_reg,cur_reg}->iter.depth, see above */ 20070 !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) 20071 return false; 20072 break; 20073 case STACK_IRQ_FLAG: 20074 old_reg = &old->stack[spi].spilled_ptr; 20075 cur_reg = &cur->stack[spi].spilled_ptr; 20076 if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap) || 20077 old_reg->irq.kfunc_class != cur_reg->irq.kfunc_class) 20078 return false; 20079 break; 20080 case STACK_MISC: 20081 case STACK_ZERO: 20082 case STACK_INVALID: 20083 continue; 20084 /* Ensure that new unhandled slot types return false by default */ 20085 default: 20086 return false; 20087 } 20088 } 20089 return true; 20090 } 20091 20092 static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur, 20093 struct bpf_idmap *idmap) 20094 { 20095 int i; 20096 20097 if (old->acquired_refs != cur->acquired_refs) 20098 return false; 20099 20100 if (old->active_locks != cur->active_locks) 20101 return false; 20102 20103 if (old->active_preempt_locks != cur->active_preempt_locks) 20104 return false; 20105 20106 if (old->active_rcu_locks != cur->active_rcu_locks) 20107 return false; 20108 20109 if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap)) 20110 return false; 20111 20112 if (!check_ids(old->active_lock_id, cur->active_lock_id, idmap) || 20113 old->active_lock_ptr != cur->active_lock_ptr) 20114 return false; 20115 20116 for (i = 0; i < old->acquired_refs; i++) { 20117 if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) || 20118 old->refs[i].type != cur->refs[i].type) 20119 return false; 20120 switch (old->refs[i].type) { 20121 case REF_TYPE_PTR: 20122 case REF_TYPE_IRQ: 20123 break; 20124 case REF_TYPE_LOCK: 20125 case REF_TYPE_RES_LOCK: 20126 case REF_TYPE_RES_LOCK_IRQ: 20127 if (old->refs[i].ptr != cur->refs[i].ptr) 20128 return false; 20129 break; 20130 default: 20131 WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type); 20132 return false; 20133 } 20134 } 20135 20136 return true; 20137 } 20138 20139 /* compare two verifier states 20140 * 20141 * all states stored in state_list are known to be valid, since 20142 * verifier reached 'bpf_exit' instruction through them 20143 * 20144 * this function is called when verifier exploring different branches of 20145 * execution popped from the state stack. If it sees an old state that has 20146 * more strict register state and more strict stack state then this execution 20147 * branch doesn't need to be explored further, since verifier already 20148 * concluded that more strict state leads to valid finish. 20149 * 20150 * Therefore two states are equivalent if register state is more conservative 20151 * and explored stack state is more conservative than the current one. 20152 * Example: 20153 * explored current 20154 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC) 20155 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC) 20156 * 20157 * In other words if current stack state (one being explored) has more 20158 * valid slots than old one that already passed validation, it means 20159 * the verifier can stop exploring and conclude that current state is valid too 20160 * 20161 * Similarly with registers. If explored state has register type as invalid 20162 * whereas register type in current state is meaningful, it means that 20163 * the current state will reach 'bpf_exit' instruction safely 20164 */ 20165 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, 20166 struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact) 20167 { 20168 u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before; 20169 u16 i; 20170 20171 if (old->callback_depth > cur->callback_depth) 20172 return false; 20173 20174 for (i = 0; i < MAX_BPF_REG; i++) 20175 if (((1 << i) & live_regs) && 20176 !regsafe(env, &old->regs[i], &cur->regs[i], 20177 &env->idmap_scratch, exact)) 20178 return false; 20179 20180 if (!stacksafe(env, old, cur, &env->idmap_scratch, exact)) 20181 return false; 20182 20183 return true; 20184 } 20185 20186 static void reset_idmap_scratch(struct bpf_verifier_env *env) 20187 { 20188 struct bpf_idmap *idmap = &env->idmap_scratch; 20189 20190 idmap->tmp_id_gen = env->id_gen; 20191 idmap->cnt = 0; 20192 } 20193 20194 static bool states_equal(struct bpf_verifier_env *env, 20195 struct bpf_verifier_state *old, 20196 struct bpf_verifier_state *cur, 20197 enum exact_level exact) 20198 { 20199 u32 insn_idx; 20200 int i; 20201 20202 if (old->curframe != cur->curframe) 20203 return false; 20204 20205 reset_idmap_scratch(env); 20206 20207 /* Verification state from speculative execution simulation 20208 * must never prune a non-speculative execution one. 20209 */ 20210 if (old->speculative && !cur->speculative) 20211 return false; 20212 20213 if (old->in_sleepable != cur->in_sleepable) 20214 return false; 20215 20216 if (!refsafe(old, cur, &env->idmap_scratch)) 20217 return false; 20218 20219 /* for states to be equal callsites have to be the same 20220 * and all frame states need to be equivalent 20221 */ 20222 for (i = 0; i <= old->curframe; i++) { 20223 insn_idx = frame_insn_idx(old, i); 20224 if (old->frame[i]->callsite != cur->frame[i]->callsite) 20225 return false; 20226 if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact)) 20227 return false; 20228 } 20229 return true; 20230 } 20231 20232 /* find precise scalars in the previous equivalent state and 20233 * propagate them into the current state 20234 */ 20235 static int propagate_precision(struct bpf_verifier_env *env, 20236 const struct bpf_verifier_state *old, 20237 struct bpf_verifier_state *cur, 20238 bool *changed) 20239 { 20240 struct bpf_reg_state *state_reg; 20241 struct bpf_func_state *state; 20242 int i, err = 0, fr; 20243 bool first; 20244 20245 for (fr = old->curframe; fr >= 0; fr--) { 20246 state = old->frame[fr]; 20247 state_reg = state->regs; 20248 first = true; 20249 for (i = 0; i < BPF_REG_FP; i++, state_reg++) { 20250 if (state_reg->type != SCALAR_VALUE || 20251 !state_reg->precise) 20252 continue; 20253 if (env->log.level & BPF_LOG_LEVEL2) { 20254 if (first) 20255 verbose(env, "frame %d: propagating r%d", fr, i); 20256 else 20257 verbose(env, ",r%d", i); 20258 } 20259 bt_set_frame_reg(&env->bt, fr, i); 20260 first = false; 20261 } 20262 20263 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 20264 if (!is_spilled_reg(&state->stack[i])) 20265 continue; 20266 state_reg = &state->stack[i].spilled_ptr; 20267 if (state_reg->type != SCALAR_VALUE || 20268 !state_reg->precise) 20269 continue; 20270 if (env->log.level & BPF_LOG_LEVEL2) { 20271 if (first) 20272 verbose(env, "frame %d: propagating fp%d", 20273 fr, (-i - 1) * BPF_REG_SIZE); 20274 else 20275 verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE); 20276 } 20277 bt_set_frame_slot(&env->bt, fr, i); 20278 first = false; 20279 } 20280 if (!first && (env->log.level & BPF_LOG_LEVEL2)) 20281 verbose(env, "\n"); 20282 } 20283 20284 err = __mark_chain_precision(env, cur, -1, changed); 20285 if (err < 0) 20286 return err; 20287 20288 return 0; 20289 } 20290 20291 #define MAX_BACKEDGE_ITERS 64 20292 20293 /* Propagate read and precision marks from visit->backedges[*].state->equal_state 20294 * to corresponding parent states of visit->backedges[*].state until fixed point is reached, 20295 * then free visit->backedges. 20296 * After execution of this function incomplete_read_marks() will return false 20297 * for all states corresponding to @visit->callchain. 20298 */ 20299 static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit) 20300 { 20301 struct bpf_scc_backedge *backedge; 20302 struct bpf_verifier_state *st; 20303 bool changed; 20304 int i, err; 20305 20306 i = 0; 20307 do { 20308 if (i++ > MAX_BACKEDGE_ITERS) { 20309 if (env->log.level & BPF_LOG_LEVEL2) 20310 verbose(env, "%s: too many iterations\n", __func__); 20311 for (backedge = visit->backedges; backedge; backedge = backedge->next) 20312 mark_all_scalars_precise(env, &backedge->state); 20313 break; 20314 } 20315 changed = false; 20316 for (backedge = visit->backedges; backedge; backedge = backedge->next) { 20317 st = &backedge->state; 20318 err = propagate_precision(env, st->equal_state, st, &changed); 20319 if (err) 20320 return err; 20321 } 20322 } while (changed); 20323 20324 free_backedges(visit); 20325 return 0; 20326 } 20327 20328 static bool states_maybe_looping(struct bpf_verifier_state *old, 20329 struct bpf_verifier_state *cur) 20330 { 20331 struct bpf_func_state *fold, *fcur; 20332 int i, fr = cur->curframe; 20333 20334 if (old->curframe != fr) 20335 return false; 20336 20337 fold = old->frame[fr]; 20338 fcur = cur->frame[fr]; 20339 for (i = 0; i < MAX_BPF_REG; i++) 20340 if (memcmp(&fold->regs[i], &fcur->regs[i], 20341 offsetof(struct bpf_reg_state, frameno))) 20342 return false; 20343 return true; 20344 } 20345 20346 static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx) 20347 { 20348 return env->insn_aux_data[insn_idx].is_iter_next; 20349 } 20350 20351 /* is_state_visited() handles iter_next() (see process_iter_next_call() for 20352 * terminology) calls specially: as opposed to bounded BPF loops, it *expects* 20353 * states to match, which otherwise would look like an infinite loop. So while 20354 * iter_next() calls are taken care of, we still need to be careful and 20355 * prevent erroneous and too eager declaration of "infinite loop", when 20356 * iterators are involved. 20357 * 20358 * Here's a situation in pseudo-BPF assembly form: 20359 * 20360 * 0: again: ; set up iter_next() call args 20361 * 1: r1 = &it ; <CHECKPOINT HERE> 20362 * 2: call bpf_iter_num_next ; this is iter_next() call 20363 * 3: if r0 == 0 goto done 20364 * 4: ... something useful here ... 20365 * 5: goto again ; another iteration 20366 * 6: done: 20367 * 7: r1 = &it 20368 * 8: call bpf_iter_num_destroy ; clean up iter state 20369 * 9: exit 20370 * 20371 * This is a typical loop. Let's assume that we have a prune point at 1:, 20372 * before we get to `call bpf_iter_num_next` (e.g., because of that `goto 20373 * again`, assuming other heuristics don't get in a way). 20374 * 20375 * When we first time come to 1:, let's say we have some state X. We proceed 20376 * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit. 20377 * Now we come back to validate that forked ACTIVE state. We proceed through 20378 * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we 20379 * are converging. But the problem is that we don't know that yet, as this 20380 * convergence has to happen at iter_next() call site only. So if nothing is 20381 * done, at 1: verifier will use bounded loop logic and declare infinite 20382 * looping (and would be *technically* correct, if not for iterator's 20383 * "eventual sticky NULL" contract, see process_iter_next_call()). But we 20384 * don't want that. So what we do in process_iter_next_call() when we go on 20385 * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's 20386 * a different iteration. So when we suspect an infinite loop, we additionally 20387 * check if any of the *ACTIVE* iterator states depths differ. If yes, we 20388 * pretend we are not looping and wait for next iter_next() call. 20389 * 20390 * This only applies to ACTIVE state. In DRAINED state we don't expect to 20391 * loop, because that would actually mean infinite loop, as DRAINED state is 20392 * "sticky", and so we'll keep returning into the same instruction with the 20393 * same state (at least in one of possible code paths). 20394 * 20395 * This approach allows to keep infinite loop heuristic even in the face of 20396 * active iterator. E.g., C snippet below is and will be detected as 20397 * infinitely looping: 20398 * 20399 * struct bpf_iter_num it; 20400 * int *p, x; 20401 * 20402 * bpf_iter_num_new(&it, 0, 10); 20403 * while ((p = bpf_iter_num_next(&t))) { 20404 * x = p; 20405 * while (x--) {} // <<-- infinite loop here 20406 * } 20407 * 20408 */ 20409 static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur) 20410 { 20411 struct bpf_reg_state *slot, *cur_slot; 20412 struct bpf_func_state *state; 20413 int i, fr; 20414 20415 for (fr = old->curframe; fr >= 0; fr--) { 20416 state = old->frame[fr]; 20417 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 20418 if (state->stack[i].slot_type[0] != STACK_ITER) 20419 continue; 20420 20421 slot = &state->stack[i].spilled_ptr; 20422 if (slot->iter.state != BPF_ITER_STATE_ACTIVE) 20423 continue; 20424 20425 cur_slot = &cur->frame[fr]->stack[i].spilled_ptr; 20426 if (cur_slot->iter.depth != slot->iter.depth) 20427 return true; 20428 } 20429 } 20430 return false; 20431 } 20432 20433 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) 20434 { 20435 struct bpf_verifier_state_list *new_sl; 20436 struct bpf_verifier_state_list *sl; 20437 struct bpf_verifier_state *cur = env->cur_state, *new; 20438 bool force_new_state, add_new_state, loop; 20439 int n, err, states_cnt = 0; 20440 struct list_head *pos, *tmp, *head; 20441 20442 force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) || 20443 /* Avoid accumulating infinitely long jmp history */ 20444 cur->jmp_history_cnt > 40; 20445 20446 /* bpf progs typically have pruning point every 4 instructions 20447 * http://vger.kernel.org/bpfconf2019.html#session-1 20448 * Do not add new state for future pruning if the verifier hasn't seen 20449 * at least 2 jumps and at least 8 instructions. 20450 * This heuristics helps decrease 'total_states' and 'peak_states' metric. 20451 * In tests that amounts to up to 50% reduction into total verifier 20452 * memory consumption and 20% verifier time speedup. 20453 */ 20454 add_new_state = force_new_state; 20455 if (env->jmps_processed - env->prev_jmps_processed >= 2 && 20456 env->insn_processed - env->prev_insn_processed >= 8) 20457 add_new_state = true; 20458 20459 clean_live_states(env, insn_idx, cur); 20460 20461 loop = false; 20462 head = explored_state(env, insn_idx); 20463 list_for_each_safe(pos, tmp, head) { 20464 sl = container_of(pos, struct bpf_verifier_state_list, node); 20465 states_cnt++; 20466 if (sl->state.insn_idx != insn_idx) 20467 continue; 20468 20469 if (sl->state.branches) { 20470 struct bpf_func_state *frame = sl->state.frame[sl->state.curframe]; 20471 20472 if (frame->in_async_callback_fn && 20473 frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) { 20474 /* Different async_entry_cnt means that the verifier is 20475 * processing another entry into async callback. 20476 * Seeing the same state is not an indication of infinite 20477 * loop or infinite recursion. 20478 * But finding the same state doesn't mean that it's safe 20479 * to stop processing the current state. The previous state 20480 * hasn't yet reached bpf_exit, since state.branches > 0. 20481 * Checking in_async_callback_fn alone is not enough either. 20482 * Since the verifier still needs to catch infinite loops 20483 * inside async callbacks. 20484 */ 20485 goto skip_inf_loop_check; 20486 } 20487 /* BPF open-coded iterators loop detection is special. 20488 * states_maybe_looping() logic is too simplistic in detecting 20489 * states that *might* be equivalent, because it doesn't know 20490 * about ID remapping, so don't even perform it. 20491 * See process_iter_next_call() and iter_active_depths_differ() 20492 * for overview of the logic. When current and one of parent 20493 * states are detected as equivalent, it's a good thing: we prove 20494 * convergence and can stop simulating further iterations. 20495 * It's safe to assume that iterator loop will finish, taking into 20496 * account iter_next() contract of eventually returning 20497 * sticky NULL result. 20498 * 20499 * Note, that states have to be compared exactly in this case because 20500 * read and precision marks might not be finalized inside the loop. 20501 * E.g. as in the program below: 20502 * 20503 * 1. r7 = -16 20504 * 2. r6 = bpf_get_prandom_u32() 20505 * 3. while (bpf_iter_num_next(&fp[-8])) { 20506 * 4. if (r6 != 42) { 20507 * 5. r7 = -32 20508 * 6. r6 = bpf_get_prandom_u32() 20509 * 7. continue 20510 * 8. } 20511 * 9. r0 = r10 20512 * 10. r0 += r7 20513 * 11. r8 = *(u64 *)(r0 + 0) 20514 * 12. r6 = bpf_get_prandom_u32() 20515 * 13. } 20516 * 20517 * Here verifier would first visit path 1-3, create a checkpoint at 3 20518 * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does 20519 * not have read or precision mark for r7 yet, thus inexact states 20520 * comparison would discard current state with r7=-32 20521 * => unsafe memory access at 11 would not be caught. 20522 */ 20523 if (is_iter_next_insn(env, insn_idx)) { 20524 if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) { 20525 struct bpf_func_state *cur_frame; 20526 struct bpf_reg_state *iter_state, *iter_reg; 20527 int spi; 20528 20529 cur_frame = cur->frame[cur->curframe]; 20530 /* btf_check_iter_kfuncs() enforces that 20531 * iter state pointer is always the first arg 20532 */ 20533 iter_reg = &cur_frame->regs[BPF_REG_1]; 20534 /* current state is valid due to states_equal(), 20535 * so we can assume valid iter and reg state, 20536 * no need for extra (re-)validations 20537 */ 20538 spi = __get_spi(iter_reg->off + iter_reg->var_off.value); 20539 iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr; 20540 if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) { 20541 loop = true; 20542 goto hit; 20543 } 20544 } 20545 goto skip_inf_loop_check; 20546 } 20547 if (is_may_goto_insn_at(env, insn_idx)) { 20548 if (sl->state.may_goto_depth != cur->may_goto_depth && 20549 states_equal(env, &sl->state, cur, RANGE_WITHIN)) { 20550 loop = true; 20551 goto hit; 20552 } 20553 } 20554 if (bpf_calls_callback(env, insn_idx)) { 20555 if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) { 20556 loop = true; 20557 goto hit; 20558 } 20559 goto skip_inf_loop_check; 20560 } 20561 /* attempt to detect infinite loop to avoid unnecessary doomed work */ 20562 if (states_maybe_looping(&sl->state, cur) && 20563 states_equal(env, &sl->state, cur, EXACT) && 20564 !iter_active_depths_differ(&sl->state, cur) && 20565 sl->state.may_goto_depth == cur->may_goto_depth && 20566 sl->state.callback_unroll_depth == cur->callback_unroll_depth) { 20567 verbose_linfo(env, insn_idx, "; "); 20568 verbose(env, "infinite loop detected at insn %d\n", insn_idx); 20569 verbose(env, "cur state:"); 20570 print_verifier_state(env, cur, cur->curframe, true); 20571 verbose(env, "old state:"); 20572 print_verifier_state(env, &sl->state, cur->curframe, true); 20573 return -EINVAL; 20574 } 20575 /* if the verifier is processing a loop, avoid adding new state 20576 * too often, since different loop iterations have distinct 20577 * states and may not help future pruning. 20578 * This threshold shouldn't be too low to make sure that 20579 * a loop with large bound will be rejected quickly. 20580 * The most abusive loop will be: 20581 * r1 += 1 20582 * if r1 < 1000000 goto pc-2 20583 * 1M insn_procssed limit / 100 == 10k peak states. 20584 * This threshold shouldn't be too high either, since states 20585 * at the end of the loop are likely to be useful in pruning. 20586 */ 20587 skip_inf_loop_check: 20588 if (!force_new_state && 20589 env->jmps_processed - env->prev_jmps_processed < 20 && 20590 env->insn_processed - env->prev_insn_processed < 100) 20591 add_new_state = false; 20592 goto miss; 20593 } 20594 /* See comments for mark_all_regs_read_and_precise() */ 20595 loop = incomplete_read_marks(env, &sl->state); 20596 if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) { 20597 hit: 20598 sl->hit_cnt++; 20599 20600 /* if previous state reached the exit with precision and 20601 * current state is equivalent to it (except precision marks) 20602 * the precision needs to be propagated back in 20603 * the current state. 20604 */ 20605 err = 0; 20606 if (is_jmp_point(env, env->insn_idx)) 20607 err = push_jmp_history(env, cur, 0, 0); 20608 err = err ? : propagate_precision(env, &sl->state, cur, NULL); 20609 if (err) 20610 return err; 20611 /* When processing iterator based loops above propagate_liveness and 20612 * propagate_precision calls are not sufficient to transfer all relevant 20613 * read and precision marks. E.g. consider the following case: 20614 * 20615 * .-> A --. Assume the states are visited in the order A, B, C. 20616 * | | | Assume that state B reaches a state equivalent to state A. 20617 * | v v At this point, state C is not processed yet, so state A 20618 * '-- B C has not received any read or precision marks from C. 20619 * Thus, marks propagated from A to B are incomplete. 20620 * 20621 * The verifier mitigates this by performing the following steps: 20622 * 20623 * - Prior to the main verification pass, strongly connected components 20624 * (SCCs) are computed over the program's control flow graph, 20625 * intraprocedurally. 20626 * 20627 * - During the main verification pass, `maybe_enter_scc()` checks 20628 * whether the current verifier state is entering an SCC. If so, an 20629 * instance of a `bpf_scc_visit` object is created, and the state 20630 * entering the SCC is recorded as the entry state. 20631 * 20632 * - This instance is associated not with the SCC itself, but with a 20633 * `bpf_scc_callchain`: a tuple consisting of the call sites leading to 20634 * the SCC and the SCC id. See `compute_scc_callchain()`. 20635 * 20636 * - When a verification path encounters a `states_equal(..., 20637 * RANGE_WITHIN)` condition, there exists a call chain describing the 20638 * current state and a corresponding `bpf_scc_visit` instance. A copy 20639 * of the current state is created and added to 20640 * `bpf_scc_visit->backedges`. 20641 * 20642 * - When a verification path terminates, `maybe_exit_scc()` is called 20643 * from `update_branch_counts()`. For states with `branches == 0`, it 20644 * checks whether the state is the entry state of any `bpf_scc_visit` 20645 * instance. If it is, this indicates that all paths originating from 20646 * this SCC visit have been explored. `propagate_backedges()` is then 20647 * called, which propagates read and precision marks through the 20648 * backedges until a fixed point is reached. 20649 * (In the earlier example, this would propagate marks from A to B, 20650 * from C to A, and then again from A to B.) 20651 * 20652 * A note on callchains 20653 * -------------------- 20654 * 20655 * Consider the following example: 20656 * 20657 * void foo() { loop { ... SCC#1 ... } } 20658 * void main() { 20659 * A: foo(); 20660 * B: ... 20661 * C: foo(); 20662 * } 20663 * 20664 * Here, there are two distinct callchains leading to SCC#1: 20665 * - (A, SCC#1) 20666 * - (C, SCC#1) 20667 * 20668 * Each callchain identifies a separate `bpf_scc_visit` instance that 20669 * accumulates backedge states. The `propagate_{liveness,precision}()` 20670 * functions traverse the parent state of each backedge state, which 20671 * means these parent states must remain valid (i.e., not freed) while 20672 * the corresponding `bpf_scc_visit` instance exists. 20673 * 20674 * Associating `bpf_scc_visit` instances directly with SCCs instead of 20675 * callchains would break this invariant: 20676 * - States explored during `C: foo()` would contribute backedges to 20677 * SCC#1, but SCC#1 would only be exited once the exploration of 20678 * `A: foo()` completes. 20679 * - By that time, the states explored between `A: foo()` and `C: foo()` 20680 * (i.e., `B: ...`) may have already been freed, causing the parent 20681 * links for states from `C: foo()` to become invalid. 20682 */ 20683 if (loop) { 20684 struct bpf_scc_backedge *backedge; 20685 20686 backedge = kzalloc_obj(*backedge, 20687 GFP_KERNEL_ACCOUNT); 20688 if (!backedge) 20689 return -ENOMEM; 20690 err = copy_verifier_state(&backedge->state, cur); 20691 backedge->state.equal_state = &sl->state; 20692 backedge->state.insn_idx = insn_idx; 20693 err = err ?: add_scc_backedge(env, &sl->state, backedge); 20694 if (err) { 20695 free_verifier_state(&backedge->state, false); 20696 kfree(backedge); 20697 return err; 20698 } 20699 } 20700 return 1; 20701 } 20702 miss: 20703 /* when new state is not going to be added do not increase miss count. 20704 * Otherwise several loop iterations will remove the state 20705 * recorded earlier. The goal of these heuristics is to have 20706 * states from some iterations of the loop (some in the beginning 20707 * and some at the end) to help pruning. 20708 */ 20709 if (add_new_state) 20710 sl->miss_cnt++; 20711 /* heuristic to determine whether this state is beneficial 20712 * to keep checking from state equivalence point of view. 20713 * Higher numbers increase max_states_per_insn and verification time, 20714 * but do not meaningfully decrease insn_processed. 20715 * 'n' controls how many times state could miss before eviction. 20716 * Use bigger 'n' for checkpoints because evicting checkpoint states 20717 * too early would hinder iterator convergence. 20718 */ 20719 n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3; 20720 if (sl->miss_cnt > sl->hit_cnt * n + n) { 20721 /* the state is unlikely to be useful. Remove it to 20722 * speed up verification 20723 */ 20724 sl->in_free_list = true; 20725 list_del(&sl->node); 20726 list_add(&sl->node, &env->free_list); 20727 env->free_list_size++; 20728 env->explored_states_size--; 20729 maybe_free_verifier_state(env, sl); 20730 } 20731 } 20732 20733 if (env->max_states_per_insn < states_cnt) 20734 env->max_states_per_insn = states_cnt; 20735 20736 if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) 20737 return 0; 20738 20739 if (!add_new_state) 20740 return 0; 20741 20742 /* There were no equivalent states, remember the current one. 20743 * Technically the current state is not proven to be safe yet, 20744 * but it will either reach outer most bpf_exit (which means it's safe) 20745 * or it will be rejected. When there are no loops the verifier won't be 20746 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) 20747 * again on the way to bpf_exit. 20748 * When looping the sl->state.branches will be > 0 and this state 20749 * will not be considered for equivalence until branches == 0. 20750 */ 20751 new_sl = kzalloc_obj(struct bpf_verifier_state_list, GFP_KERNEL_ACCOUNT); 20752 if (!new_sl) 20753 return -ENOMEM; 20754 env->total_states++; 20755 env->explored_states_size++; 20756 update_peak_states(env); 20757 env->prev_jmps_processed = env->jmps_processed; 20758 env->prev_insn_processed = env->insn_processed; 20759 20760 /* forget precise markings we inherited, see __mark_chain_precision */ 20761 if (env->bpf_capable) 20762 mark_all_scalars_imprecise(env, cur); 20763 20764 clear_singular_ids(env, cur); 20765 20766 /* add new state to the head of linked list */ 20767 new = &new_sl->state; 20768 err = copy_verifier_state(new, cur); 20769 if (err) { 20770 free_verifier_state(new, false); 20771 kfree(new_sl); 20772 return err; 20773 } 20774 new->insn_idx = insn_idx; 20775 verifier_bug_if(new->branches != 1, env, 20776 "%s:branches_to_explore=%d insn %d", 20777 __func__, new->branches, insn_idx); 20778 err = maybe_enter_scc(env, new); 20779 if (err) { 20780 free_verifier_state(new, false); 20781 kfree(new_sl); 20782 return err; 20783 } 20784 20785 cur->parent = new; 20786 cur->first_insn_idx = insn_idx; 20787 cur->dfs_depth = new->dfs_depth + 1; 20788 clear_jmp_history(cur); 20789 list_add(&new_sl->node, head); 20790 return 0; 20791 } 20792 20793 /* Return true if it's OK to have the same insn return a different type. */ 20794 static bool reg_type_mismatch_ok(enum bpf_reg_type type) 20795 { 20796 switch (base_type(type)) { 20797 case PTR_TO_CTX: 20798 case PTR_TO_SOCKET: 20799 case PTR_TO_SOCK_COMMON: 20800 case PTR_TO_TCP_SOCK: 20801 case PTR_TO_XDP_SOCK: 20802 case PTR_TO_BTF_ID: 20803 case PTR_TO_ARENA: 20804 return false; 20805 default: 20806 return true; 20807 } 20808 } 20809 20810 /* If an instruction was previously used with particular pointer types, then we 20811 * need to be careful to avoid cases such as the below, where it may be ok 20812 * for one branch accessing the pointer, but not ok for the other branch: 20813 * 20814 * R1 = sock_ptr 20815 * goto X; 20816 * ... 20817 * R1 = some_other_valid_ptr; 20818 * goto X; 20819 * ... 20820 * R2 = *(u32 *)(R1 + 0); 20821 */ 20822 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) 20823 { 20824 return src != prev && (!reg_type_mismatch_ok(src) || 20825 !reg_type_mismatch_ok(prev)); 20826 } 20827 20828 static bool is_ptr_to_mem_or_btf_id(enum bpf_reg_type type) 20829 { 20830 switch (base_type(type)) { 20831 case PTR_TO_MEM: 20832 case PTR_TO_BTF_ID: 20833 return true; 20834 default: 20835 return false; 20836 } 20837 } 20838 20839 static bool is_ptr_to_mem(enum bpf_reg_type type) 20840 { 20841 return base_type(type) == PTR_TO_MEM; 20842 } 20843 20844 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type, 20845 bool allow_trust_mismatch) 20846 { 20847 enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type; 20848 enum bpf_reg_type merged_type; 20849 20850 if (*prev_type == NOT_INIT) { 20851 /* Saw a valid insn 20852 * dst_reg = *(u32 *)(src_reg + off) 20853 * save type to validate intersecting paths 20854 */ 20855 *prev_type = type; 20856 } else if (reg_type_mismatch(type, *prev_type)) { 20857 /* Abuser program is trying to use the same insn 20858 * dst_reg = *(u32*) (src_reg + off) 20859 * with different pointer types: 20860 * src_reg == ctx in one branch and 20861 * src_reg == stack|map in some other branch. 20862 * Reject it. 20863 */ 20864 if (allow_trust_mismatch && 20865 is_ptr_to_mem_or_btf_id(type) && 20866 is_ptr_to_mem_or_btf_id(*prev_type)) { 20867 /* 20868 * Have to support a use case when one path through 20869 * the program yields TRUSTED pointer while another 20870 * is UNTRUSTED. Fallback to UNTRUSTED to generate 20871 * BPF_PROBE_MEM/BPF_PROBE_MEMSX. 20872 * Same behavior of MEM_RDONLY flag. 20873 */ 20874 if (is_ptr_to_mem(type) || is_ptr_to_mem(*prev_type)) 20875 merged_type = PTR_TO_MEM; 20876 else 20877 merged_type = PTR_TO_BTF_ID; 20878 if ((type & PTR_UNTRUSTED) || (*prev_type & PTR_UNTRUSTED)) 20879 merged_type |= PTR_UNTRUSTED; 20880 if ((type & MEM_RDONLY) || (*prev_type & MEM_RDONLY)) 20881 merged_type |= MEM_RDONLY; 20882 *prev_type = merged_type; 20883 } else { 20884 verbose(env, "same insn cannot be used with different pointers\n"); 20885 return -EINVAL; 20886 } 20887 } 20888 20889 return 0; 20890 } 20891 20892 enum { 20893 PROCESS_BPF_EXIT = 1 20894 }; 20895 20896 static int process_bpf_exit_full(struct bpf_verifier_env *env, 20897 bool *do_print_state, 20898 bool exception_exit) 20899 { 20900 /* We must do check_reference_leak here before 20901 * prepare_func_exit to handle the case when 20902 * state->curframe > 0, it may be a callback function, 20903 * for which reference_state must match caller reference 20904 * state when it exits. 20905 */ 20906 int err = check_resource_leak(env, exception_exit, 20907 !env->cur_state->curframe, 20908 "BPF_EXIT instruction in main prog"); 20909 if (err) 20910 return err; 20911 20912 /* The side effect of the prepare_func_exit which is 20913 * being skipped is that it frees bpf_func_state. 20914 * Typically, process_bpf_exit will only be hit with 20915 * outermost exit. copy_verifier_state in pop_stack will 20916 * handle freeing of any extra bpf_func_state left over 20917 * from not processing all nested function exits. We 20918 * also skip return code checks as they are not needed 20919 * for exceptional exits. 20920 */ 20921 if (exception_exit) 20922 return PROCESS_BPF_EXIT; 20923 20924 if (env->cur_state->curframe) { 20925 /* exit from nested function */ 20926 err = prepare_func_exit(env, &env->insn_idx); 20927 if (err) 20928 return err; 20929 *do_print_state = true; 20930 return 0; 20931 } 20932 20933 err = check_return_code(env, BPF_REG_0, "R0"); 20934 if (err) 20935 return err; 20936 return PROCESS_BPF_EXIT; 20937 } 20938 20939 static int indirect_jump_min_max_index(struct bpf_verifier_env *env, 20940 int regno, 20941 struct bpf_map *map, 20942 u32 *pmin_index, u32 *pmax_index) 20943 { 20944 struct bpf_reg_state *reg = reg_state(env, regno); 20945 u64 min_index, max_index; 20946 const u32 size = 8; 20947 20948 if (check_add_overflow(reg->umin_value, reg->off, &min_index) || 20949 (min_index > (u64) U32_MAX * size)) { 20950 verbose(env, "the sum of R%u umin_value %llu and off %u is too big\n", 20951 regno, reg->umin_value, reg->off); 20952 return -ERANGE; 20953 } 20954 if (check_add_overflow(reg->umax_value, reg->off, &max_index) || 20955 (max_index > (u64) U32_MAX * size)) { 20956 verbose(env, "the sum of R%u umax_value %llu and off %u is too big\n", 20957 regno, reg->umax_value, reg->off); 20958 return -ERANGE; 20959 } 20960 20961 min_index /= size; 20962 max_index /= size; 20963 20964 if (max_index >= map->max_entries) { 20965 verbose(env, "R%u points to outside of jump table: [%llu,%llu] max_entries %u\n", 20966 regno, min_index, max_index, map->max_entries); 20967 return -EINVAL; 20968 } 20969 20970 *pmin_index = min_index; 20971 *pmax_index = max_index; 20972 return 0; 20973 } 20974 20975 /* gotox *dst_reg */ 20976 static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *insn) 20977 { 20978 struct bpf_verifier_state *other_branch; 20979 struct bpf_reg_state *dst_reg; 20980 struct bpf_map *map; 20981 u32 min_index, max_index; 20982 int err = 0; 20983 int n; 20984 int i; 20985 20986 dst_reg = reg_state(env, insn->dst_reg); 20987 if (dst_reg->type != PTR_TO_INSN) { 20988 verbose(env, "R%d has type %s, expected PTR_TO_INSN\n", 20989 insn->dst_reg, reg_type_str(env, dst_reg->type)); 20990 return -EINVAL; 20991 } 20992 20993 map = dst_reg->map_ptr; 20994 if (verifier_bug_if(!map, env, "R%d has an empty map pointer", insn->dst_reg)) 20995 return -EFAULT; 20996 20997 if (verifier_bug_if(map->map_type != BPF_MAP_TYPE_INSN_ARRAY, env, 20998 "R%d has incorrect map type %d", insn->dst_reg, map->map_type)) 20999 return -EFAULT; 21000 21001 err = indirect_jump_min_max_index(env, insn->dst_reg, map, &min_index, &max_index); 21002 if (err) 21003 return err; 21004 21005 /* Ensure that the buffer is large enough */ 21006 if (!env->gotox_tmp_buf || env->gotox_tmp_buf->cnt < max_index - min_index + 1) { 21007 env->gotox_tmp_buf = iarray_realloc(env->gotox_tmp_buf, 21008 max_index - min_index + 1); 21009 if (!env->gotox_tmp_buf) 21010 return -ENOMEM; 21011 } 21012 21013 n = copy_insn_array_uniq(map, min_index, max_index, env->gotox_tmp_buf->items); 21014 if (n < 0) 21015 return n; 21016 if (n == 0) { 21017 verbose(env, "register R%d doesn't point to any offset in map id=%d\n", 21018 insn->dst_reg, map->id); 21019 return -EINVAL; 21020 } 21021 21022 for (i = 0; i < n - 1; i++) { 21023 other_branch = push_stack(env, env->gotox_tmp_buf->items[i], 21024 env->insn_idx, env->cur_state->speculative); 21025 if (IS_ERR(other_branch)) 21026 return PTR_ERR(other_branch); 21027 } 21028 env->insn_idx = env->gotox_tmp_buf->items[n-1]; 21029 return 0; 21030 } 21031 21032 static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state) 21033 { 21034 int err; 21035 struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx]; 21036 u8 class = BPF_CLASS(insn->code); 21037 21038 if (class == BPF_ALU || class == BPF_ALU64) { 21039 err = check_alu_op(env, insn); 21040 if (err) 21041 return err; 21042 21043 } else if (class == BPF_LDX) { 21044 bool is_ldsx = BPF_MODE(insn->code) == BPF_MEMSX; 21045 21046 /* Check for reserved fields is already done in 21047 * resolve_pseudo_ldimm64(). 21048 */ 21049 err = check_load_mem(env, insn, false, is_ldsx, true, "ldx"); 21050 if (err) 21051 return err; 21052 } else if (class == BPF_STX) { 21053 if (BPF_MODE(insn->code) == BPF_ATOMIC) { 21054 err = check_atomic(env, insn); 21055 if (err) 21056 return err; 21057 env->insn_idx++; 21058 return 0; 21059 } 21060 21061 if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) { 21062 verbose(env, "BPF_STX uses reserved fields\n"); 21063 return -EINVAL; 21064 } 21065 21066 err = check_store_reg(env, insn, false); 21067 if (err) 21068 return err; 21069 } else if (class == BPF_ST) { 21070 enum bpf_reg_type dst_reg_type; 21071 21072 if (BPF_MODE(insn->code) != BPF_MEM || 21073 insn->src_reg != BPF_REG_0) { 21074 verbose(env, "BPF_ST uses reserved fields\n"); 21075 return -EINVAL; 21076 } 21077 /* check src operand */ 21078 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 21079 if (err) 21080 return err; 21081 21082 dst_reg_type = cur_regs(env)[insn->dst_reg].type; 21083 21084 /* check that memory (dst_reg + off) is writeable */ 21085 err = check_mem_access(env, env->insn_idx, insn->dst_reg, 21086 insn->off, BPF_SIZE(insn->code), 21087 BPF_WRITE, -1, false, false); 21088 if (err) 21089 return err; 21090 21091 err = save_aux_ptr_type(env, dst_reg_type, false); 21092 if (err) 21093 return err; 21094 } else if (class == BPF_JMP || class == BPF_JMP32) { 21095 u8 opcode = BPF_OP(insn->code); 21096 21097 env->jmps_processed++; 21098 if (opcode == BPF_CALL) { 21099 if (BPF_SRC(insn->code) != BPF_K || 21100 (insn->src_reg != BPF_PSEUDO_KFUNC_CALL && 21101 insn->off != 0) || 21102 (insn->src_reg != BPF_REG_0 && 21103 insn->src_reg != BPF_PSEUDO_CALL && 21104 insn->src_reg != BPF_PSEUDO_KFUNC_CALL) || 21105 insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) { 21106 verbose(env, "BPF_CALL uses reserved fields\n"); 21107 return -EINVAL; 21108 } 21109 21110 if (env->cur_state->active_locks) { 21111 if ((insn->src_reg == BPF_REG_0 && 21112 insn->imm != BPF_FUNC_spin_unlock) || 21113 (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && 21114 (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) { 21115 verbose(env, 21116 "function calls are not allowed while holding a lock\n"); 21117 return -EINVAL; 21118 } 21119 } 21120 if (insn->src_reg == BPF_PSEUDO_CALL) { 21121 err = check_func_call(env, insn, &env->insn_idx); 21122 } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { 21123 err = check_kfunc_call(env, insn, &env->insn_idx); 21124 if (!err && is_bpf_throw_kfunc(insn)) 21125 return process_bpf_exit_full(env, do_print_state, true); 21126 } else { 21127 err = check_helper_call(env, insn, &env->insn_idx); 21128 } 21129 if (err) 21130 return err; 21131 21132 mark_reg_scratched(env, BPF_REG_0); 21133 } else if (opcode == BPF_JA) { 21134 if (BPF_SRC(insn->code) == BPF_X) { 21135 if (insn->src_reg != BPF_REG_0 || 21136 insn->imm != 0 || insn->off != 0) { 21137 verbose(env, "BPF_JA|BPF_X uses reserved fields\n"); 21138 return -EINVAL; 21139 } 21140 return check_indirect_jump(env, insn); 21141 } 21142 21143 if (BPF_SRC(insn->code) != BPF_K || 21144 insn->src_reg != BPF_REG_0 || 21145 insn->dst_reg != BPF_REG_0 || 21146 (class == BPF_JMP && insn->imm != 0) || 21147 (class == BPF_JMP32 && insn->off != 0)) { 21148 verbose(env, "BPF_JA uses reserved fields\n"); 21149 return -EINVAL; 21150 } 21151 21152 if (class == BPF_JMP) 21153 env->insn_idx += insn->off + 1; 21154 else 21155 env->insn_idx += insn->imm + 1; 21156 return 0; 21157 } else if (opcode == BPF_EXIT) { 21158 if (BPF_SRC(insn->code) != BPF_K || 21159 insn->imm != 0 || 21160 insn->src_reg != BPF_REG_0 || 21161 insn->dst_reg != BPF_REG_0 || 21162 class == BPF_JMP32) { 21163 verbose(env, "BPF_EXIT uses reserved fields\n"); 21164 return -EINVAL; 21165 } 21166 return process_bpf_exit_full(env, do_print_state, false); 21167 } else { 21168 err = check_cond_jmp_op(env, insn, &env->insn_idx); 21169 if (err) 21170 return err; 21171 } 21172 } else if (class == BPF_LD) { 21173 u8 mode = BPF_MODE(insn->code); 21174 21175 if (mode == BPF_ABS || mode == BPF_IND) { 21176 err = check_ld_abs(env, insn); 21177 if (err) 21178 return err; 21179 21180 } else if (mode == BPF_IMM) { 21181 err = check_ld_imm(env, insn); 21182 if (err) 21183 return err; 21184 21185 env->insn_idx++; 21186 sanitize_mark_insn_seen(env); 21187 } else { 21188 verbose(env, "invalid BPF_LD mode\n"); 21189 return -EINVAL; 21190 } 21191 } else { 21192 verbose(env, "unknown insn class %d\n", class); 21193 return -EINVAL; 21194 } 21195 21196 env->insn_idx++; 21197 return 0; 21198 } 21199 21200 static int do_check(struct bpf_verifier_env *env) 21201 { 21202 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); 21203 struct bpf_verifier_state *state = env->cur_state; 21204 struct bpf_insn *insns = env->prog->insnsi; 21205 int insn_cnt = env->prog->len; 21206 bool do_print_state = false; 21207 int prev_insn_idx = -1; 21208 21209 for (;;) { 21210 struct bpf_insn *insn; 21211 struct bpf_insn_aux_data *insn_aux; 21212 int err, marks_err; 21213 21214 /* reset current history entry on each new instruction */ 21215 env->cur_hist_ent = NULL; 21216 21217 env->prev_insn_idx = prev_insn_idx; 21218 if (env->insn_idx >= insn_cnt) { 21219 verbose(env, "invalid insn idx %d insn_cnt %d\n", 21220 env->insn_idx, insn_cnt); 21221 return -EFAULT; 21222 } 21223 21224 insn = &insns[env->insn_idx]; 21225 insn_aux = &env->insn_aux_data[env->insn_idx]; 21226 21227 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { 21228 verbose(env, 21229 "BPF program is too large. Processed %d insn\n", 21230 env->insn_processed); 21231 return -E2BIG; 21232 } 21233 21234 state->last_insn_idx = env->prev_insn_idx; 21235 state->insn_idx = env->insn_idx; 21236 21237 if (is_prune_point(env, env->insn_idx)) { 21238 err = is_state_visited(env, env->insn_idx); 21239 if (err < 0) 21240 return err; 21241 if (err == 1) { 21242 /* found equivalent state, can prune the search */ 21243 if (env->log.level & BPF_LOG_LEVEL) { 21244 if (do_print_state) 21245 verbose(env, "\nfrom %d to %d%s: safe\n", 21246 env->prev_insn_idx, env->insn_idx, 21247 env->cur_state->speculative ? 21248 " (speculative execution)" : ""); 21249 else 21250 verbose(env, "%d: safe\n", env->insn_idx); 21251 } 21252 goto process_bpf_exit; 21253 } 21254 } 21255 21256 if (is_jmp_point(env, env->insn_idx)) { 21257 err = push_jmp_history(env, state, 0, 0); 21258 if (err) 21259 return err; 21260 } 21261 21262 if (signal_pending(current)) 21263 return -EAGAIN; 21264 21265 if (need_resched()) 21266 cond_resched(); 21267 21268 if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) { 21269 verbose(env, "\nfrom %d to %d%s:", 21270 env->prev_insn_idx, env->insn_idx, 21271 env->cur_state->speculative ? 21272 " (speculative execution)" : ""); 21273 print_verifier_state(env, state, state->curframe, true); 21274 do_print_state = false; 21275 } 21276 21277 if (env->log.level & BPF_LOG_LEVEL) { 21278 if (verifier_state_scratched(env)) 21279 print_insn_state(env, state, state->curframe); 21280 21281 verbose_linfo(env, env->insn_idx, "; "); 21282 env->prev_log_pos = env->log.end_pos; 21283 verbose(env, "%d: ", env->insn_idx); 21284 verbose_insn(env, insn); 21285 env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos; 21286 env->prev_log_pos = env->log.end_pos; 21287 } 21288 21289 if (bpf_prog_is_offloaded(env->prog->aux)) { 21290 err = bpf_prog_offload_verify_insn(env, env->insn_idx, 21291 env->prev_insn_idx); 21292 if (err) 21293 return err; 21294 } 21295 21296 sanitize_mark_insn_seen(env); 21297 prev_insn_idx = env->insn_idx; 21298 21299 /* Reduce verification complexity by stopping speculative path 21300 * verification when a nospec is encountered. 21301 */ 21302 if (state->speculative && insn_aux->nospec) 21303 goto process_bpf_exit; 21304 21305 err = bpf_reset_stack_write_marks(env, env->insn_idx); 21306 if (err) 21307 return err; 21308 err = do_check_insn(env, &do_print_state); 21309 if (err >= 0 || error_recoverable_with_nospec(err)) { 21310 marks_err = bpf_commit_stack_write_marks(env); 21311 if (marks_err) 21312 return marks_err; 21313 } 21314 if (error_recoverable_with_nospec(err) && state->speculative) { 21315 /* Prevent this speculative path from ever reaching the 21316 * insn that would have been unsafe to execute. 21317 */ 21318 insn_aux->nospec = true; 21319 /* If it was an ADD/SUB insn, potentially remove any 21320 * markings for alu sanitization. 21321 */ 21322 insn_aux->alu_state = 0; 21323 goto process_bpf_exit; 21324 } else if (err < 0) { 21325 return err; 21326 } else if (err == PROCESS_BPF_EXIT) { 21327 goto process_bpf_exit; 21328 } 21329 WARN_ON_ONCE(err); 21330 21331 if (state->speculative && insn_aux->nospec_result) { 21332 /* If we are on a path that performed a jump-op, this 21333 * may skip a nospec patched-in after the jump. This can 21334 * currently never happen because nospec_result is only 21335 * used for the write-ops 21336 * `*(size*)(dst_reg+off)=src_reg|imm32` and helper 21337 * calls. These must never skip the following insn 21338 * (i.e., bpf_insn_successors()'s opcode_info.can_jump 21339 * is false). Still, add a warning to document this in 21340 * case nospec_result is used elsewhere in the future. 21341 * 21342 * All non-branch instructions have a single 21343 * fall-through edge. For these, nospec_result should 21344 * already work. 21345 */ 21346 if (verifier_bug_if((BPF_CLASS(insn->code) == BPF_JMP || 21347 BPF_CLASS(insn->code) == BPF_JMP32) && 21348 BPF_OP(insn->code) != BPF_CALL, env, 21349 "speculation barrier after jump instruction may not have the desired effect")) 21350 return -EFAULT; 21351 process_bpf_exit: 21352 mark_verifier_state_scratched(env); 21353 err = update_branch_counts(env, env->cur_state); 21354 if (err) 21355 return err; 21356 err = bpf_update_live_stack(env); 21357 if (err) 21358 return err; 21359 err = pop_stack(env, &prev_insn_idx, &env->insn_idx, 21360 pop_log); 21361 if (err < 0) { 21362 if (err != -ENOENT) 21363 return err; 21364 break; 21365 } else { 21366 do_print_state = true; 21367 continue; 21368 } 21369 } 21370 } 21371 21372 return 0; 21373 } 21374 21375 static int find_btf_percpu_datasec(struct btf *btf) 21376 { 21377 const struct btf_type *t; 21378 const char *tname; 21379 int i, n; 21380 21381 /* 21382 * Both vmlinux and module each have their own ".data..percpu" 21383 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF 21384 * types to look at only module's own BTF types. 21385 */ 21386 n = btf_nr_types(btf); 21387 for (i = btf_named_start_id(btf, true); i < n; i++) { 21388 t = btf_type_by_id(btf, i); 21389 if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC) 21390 continue; 21391 21392 tname = btf_name_by_offset(btf, t->name_off); 21393 if (!strcmp(tname, ".data..percpu")) 21394 return i; 21395 } 21396 21397 return -ENOENT; 21398 } 21399 21400 /* 21401 * Add btf to the env->used_btfs array. If needed, refcount the 21402 * corresponding kernel module. To simplify caller's logic 21403 * in case of error or if btf was added before the function 21404 * decreases the btf refcount. 21405 */ 21406 static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf) 21407 { 21408 struct btf_mod_pair *btf_mod; 21409 int ret = 0; 21410 int i; 21411 21412 /* check whether we recorded this BTF (and maybe module) already */ 21413 for (i = 0; i < env->used_btf_cnt; i++) 21414 if (env->used_btfs[i].btf == btf) 21415 goto ret_put; 21416 21417 if (env->used_btf_cnt >= MAX_USED_BTFS) { 21418 verbose(env, "The total number of btfs per program has reached the limit of %u\n", 21419 MAX_USED_BTFS); 21420 ret = -E2BIG; 21421 goto ret_put; 21422 } 21423 21424 btf_mod = &env->used_btfs[env->used_btf_cnt]; 21425 btf_mod->btf = btf; 21426 btf_mod->module = NULL; 21427 21428 /* if we reference variables from kernel module, bump its refcount */ 21429 if (btf_is_module(btf)) { 21430 btf_mod->module = btf_try_get_module(btf); 21431 if (!btf_mod->module) { 21432 ret = -ENXIO; 21433 goto ret_put; 21434 } 21435 } 21436 21437 env->used_btf_cnt++; 21438 return 0; 21439 21440 ret_put: 21441 /* Either error or this BTF was already added */ 21442 btf_put(btf); 21443 return ret; 21444 } 21445 21446 /* replace pseudo btf_id with kernel symbol address */ 21447 static int __check_pseudo_btf_id(struct bpf_verifier_env *env, 21448 struct bpf_insn *insn, 21449 struct bpf_insn_aux_data *aux, 21450 struct btf *btf) 21451 { 21452 const struct btf_var_secinfo *vsi; 21453 const struct btf_type *datasec; 21454 const struct btf_type *t; 21455 const char *sym_name; 21456 bool percpu = false; 21457 u32 type, id = insn->imm; 21458 s32 datasec_id; 21459 u64 addr; 21460 int i; 21461 21462 t = btf_type_by_id(btf, id); 21463 if (!t) { 21464 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id); 21465 return -ENOENT; 21466 } 21467 21468 if (!btf_type_is_var(t) && !btf_type_is_func(t)) { 21469 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id); 21470 return -EINVAL; 21471 } 21472 21473 sym_name = btf_name_by_offset(btf, t->name_off); 21474 addr = kallsyms_lookup_name(sym_name); 21475 if (!addr) { 21476 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n", 21477 sym_name); 21478 return -ENOENT; 21479 } 21480 insn[0].imm = (u32)addr; 21481 insn[1].imm = addr >> 32; 21482 21483 if (btf_type_is_func(t)) { 21484 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; 21485 aux->btf_var.mem_size = 0; 21486 return 0; 21487 } 21488 21489 datasec_id = find_btf_percpu_datasec(btf); 21490 if (datasec_id > 0) { 21491 datasec = btf_type_by_id(btf, datasec_id); 21492 for_each_vsi(i, datasec, vsi) { 21493 if (vsi->type == id) { 21494 percpu = true; 21495 break; 21496 } 21497 } 21498 } 21499 21500 type = t->type; 21501 t = btf_type_skip_modifiers(btf, type, NULL); 21502 if (percpu) { 21503 aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU; 21504 aux->btf_var.btf = btf; 21505 aux->btf_var.btf_id = type; 21506 } else if (!btf_type_is_struct(t)) { 21507 const struct btf_type *ret; 21508 const char *tname; 21509 u32 tsize; 21510 21511 /* resolve the type size of ksym. */ 21512 ret = btf_resolve_size(btf, t, &tsize); 21513 if (IS_ERR(ret)) { 21514 tname = btf_name_by_offset(btf, t->name_off); 21515 verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n", 21516 tname, PTR_ERR(ret)); 21517 return -EINVAL; 21518 } 21519 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; 21520 aux->btf_var.mem_size = tsize; 21521 } else { 21522 aux->btf_var.reg_type = PTR_TO_BTF_ID; 21523 aux->btf_var.btf = btf; 21524 aux->btf_var.btf_id = type; 21525 } 21526 21527 return 0; 21528 } 21529 21530 static int check_pseudo_btf_id(struct bpf_verifier_env *env, 21531 struct bpf_insn *insn, 21532 struct bpf_insn_aux_data *aux) 21533 { 21534 struct btf *btf; 21535 int btf_fd; 21536 int err; 21537 21538 btf_fd = insn[1].imm; 21539 if (btf_fd) { 21540 btf = btf_get_by_fd(btf_fd); 21541 if (IS_ERR(btf)) { 21542 verbose(env, "invalid module BTF object FD specified.\n"); 21543 return -EINVAL; 21544 } 21545 } else { 21546 if (!btf_vmlinux) { 21547 verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n"); 21548 return -EINVAL; 21549 } 21550 btf_get(btf_vmlinux); 21551 btf = btf_vmlinux; 21552 } 21553 21554 err = __check_pseudo_btf_id(env, insn, aux, btf); 21555 if (err) { 21556 btf_put(btf); 21557 return err; 21558 } 21559 21560 return __add_used_btf(env, btf); 21561 } 21562 21563 static bool is_tracing_prog_type(enum bpf_prog_type type) 21564 { 21565 switch (type) { 21566 case BPF_PROG_TYPE_KPROBE: 21567 case BPF_PROG_TYPE_TRACEPOINT: 21568 case BPF_PROG_TYPE_PERF_EVENT: 21569 case BPF_PROG_TYPE_RAW_TRACEPOINT: 21570 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 21571 return true; 21572 default: 21573 return false; 21574 } 21575 } 21576 21577 static bool bpf_map_is_cgroup_storage(struct bpf_map *map) 21578 { 21579 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || 21580 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); 21581 } 21582 21583 static int check_map_prog_compatibility(struct bpf_verifier_env *env, 21584 struct bpf_map *map, 21585 struct bpf_prog *prog) 21586 21587 { 21588 enum bpf_prog_type prog_type = resolve_prog_type(prog); 21589 21590 if (map->excl_prog_sha && 21591 memcmp(map->excl_prog_sha, prog->digest, SHA256_DIGEST_SIZE)) { 21592 verbose(env, "program's hash doesn't match map's excl_prog_hash\n"); 21593 return -EACCES; 21594 } 21595 21596 if (btf_record_has_field(map->record, BPF_LIST_HEAD) || 21597 btf_record_has_field(map->record, BPF_RB_ROOT)) { 21598 if (is_tracing_prog_type(prog_type)) { 21599 verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n"); 21600 return -EINVAL; 21601 } 21602 } 21603 21604 if (btf_record_has_field(map->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) { 21605 if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) { 21606 verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n"); 21607 return -EINVAL; 21608 } 21609 21610 if (is_tracing_prog_type(prog_type)) { 21611 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); 21612 return -EINVAL; 21613 } 21614 } 21615 21616 if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) && 21617 !bpf_offload_prog_map_match(prog, map)) { 21618 verbose(env, "offload device mismatch between prog and map\n"); 21619 return -EINVAL; 21620 } 21621 21622 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 21623 verbose(env, "bpf_struct_ops map cannot be used in prog\n"); 21624 return -EINVAL; 21625 } 21626 21627 if (prog->sleepable) 21628 switch (map->map_type) { 21629 case BPF_MAP_TYPE_HASH: 21630 case BPF_MAP_TYPE_LRU_HASH: 21631 case BPF_MAP_TYPE_ARRAY: 21632 case BPF_MAP_TYPE_PERCPU_HASH: 21633 case BPF_MAP_TYPE_PERCPU_ARRAY: 21634 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 21635 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 21636 case BPF_MAP_TYPE_HASH_OF_MAPS: 21637 case BPF_MAP_TYPE_RINGBUF: 21638 case BPF_MAP_TYPE_USER_RINGBUF: 21639 case BPF_MAP_TYPE_INODE_STORAGE: 21640 case BPF_MAP_TYPE_SK_STORAGE: 21641 case BPF_MAP_TYPE_TASK_STORAGE: 21642 case BPF_MAP_TYPE_CGRP_STORAGE: 21643 case BPF_MAP_TYPE_QUEUE: 21644 case BPF_MAP_TYPE_STACK: 21645 case BPF_MAP_TYPE_ARENA: 21646 case BPF_MAP_TYPE_INSN_ARRAY: 21647 case BPF_MAP_TYPE_PROG_ARRAY: 21648 break; 21649 default: 21650 verbose(env, 21651 "Sleepable programs can only use array, hash, ringbuf and local storage maps\n"); 21652 return -EINVAL; 21653 } 21654 21655 if (bpf_map_is_cgroup_storage(map) && 21656 bpf_cgroup_storage_assign(env->prog->aux, map)) { 21657 verbose(env, "only one cgroup storage of each type is allowed\n"); 21658 return -EBUSY; 21659 } 21660 21661 if (map->map_type == BPF_MAP_TYPE_ARENA) { 21662 if (env->prog->aux->arena) { 21663 verbose(env, "Only one arena per program\n"); 21664 return -EBUSY; 21665 } 21666 if (!env->allow_ptr_leaks || !env->bpf_capable) { 21667 verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n"); 21668 return -EPERM; 21669 } 21670 if (!env->prog->jit_requested) { 21671 verbose(env, "JIT is required to use arena\n"); 21672 return -EOPNOTSUPP; 21673 } 21674 if (!bpf_jit_supports_arena()) { 21675 verbose(env, "JIT doesn't support arena\n"); 21676 return -EOPNOTSUPP; 21677 } 21678 env->prog->aux->arena = (void *)map; 21679 if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) { 21680 verbose(env, "arena's user address must be set via map_extra or mmap()\n"); 21681 return -EINVAL; 21682 } 21683 } 21684 21685 return 0; 21686 } 21687 21688 static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map) 21689 { 21690 int i, err; 21691 21692 /* check whether we recorded this map already */ 21693 for (i = 0; i < env->used_map_cnt; i++) 21694 if (env->used_maps[i] == map) 21695 return i; 21696 21697 if (env->used_map_cnt >= MAX_USED_MAPS) { 21698 verbose(env, "The total number of maps per program has reached the limit of %u\n", 21699 MAX_USED_MAPS); 21700 return -E2BIG; 21701 } 21702 21703 err = check_map_prog_compatibility(env, map, env->prog); 21704 if (err) 21705 return err; 21706 21707 if (env->prog->sleepable) 21708 atomic64_inc(&map->sleepable_refcnt); 21709 21710 /* hold the map. If the program is rejected by verifier, 21711 * the map will be released by release_maps() or it 21712 * will be used by the valid program until it's unloaded 21713 * and all maps are released in bpf_free_used_maps() 21714 */ 21715 bpf_map_inc(map); 21716 21717 env->used_maps[env->used_map_cnt++] = map; 21718 21719 if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) { 21720 err = bpf_insn_array_init(map, env->prog); 21721 if (err) { 21722 verbose(env, "Failed to properly initialize insn array\n"); 21723 return err; 21724 } 21725 env->insn_array_maps[env->insn_array_map_cnt++] = map; 21726 } 21727 21728 return env->used_map_cnt - 1; 21729 } 21730 21731 /* Add map behind fd to used maps list, if it's not already there, and return 21732 * its index. 21733 * Returns <0 on error, or >= 0 index, on success. 21734 */ 21735 static int add_used_map(struct bpf_verifier_env *env, int fd) 21736 { 21737 struct bpf_map *map; 21738 CLASS(fd, f)(fd); 21739 21740 map = __bpf_map_get(f); 21741 if (IS_ERR(map)) { 21742 verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); 21743 return PTR_ERR(map); 21744 } 21745 21746 return __add_used_map(env, map); 21747 } 21748 21749 /* find and rewrite pseudo imm in ld_imm64 instructions: 21750 * 21751 * 1. if it accesses map FD, replace it with actual map pointer. 21752 * 2. if it accesses btf_id of a VAR, replace it with pointer to the var. 21753 * 21754 * NOTE: btf_vmlinux is required for converting pseudo btf_id. 21755 */ 21756 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) 21757 { 21758 struct bpf_insn *insn = env->prog->insnsi; 21759 int insn_cnt = env->prog->len; 21760 int i, err; 21761 21762 err = bpf_prog_calc_tag(env->prog); 21763 if (err) 21764 return err; 21765 21766 for (i = 0; i < insn_cnt; i++, insn++) { 21767 if (BPF_CLASS(insn->code) == BPF_LDX && 21768 ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) || 21769 insn->imm != 0)) { 21770 verbose(env, "BPF_LDX uses reserved fields\n"); 21771 return -EINVAL; 21772 } 21773 21774 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { 21775 struct bpf_insn_aux_data *aux; 21776 struct bpf_map *map; 21777 int map_idx; 21778 u64 addr; 21779 u32 fd; 21780 21781 if (i == insn_cnt - 1 || insn[1].code != 0 || 21782 insn[1].dst_reg != 0 || insn[1].src_reg != 0 || 21783 insn[1].off != 0) { 21784 verbose(env, "invalid bpf_ld_imm64 insn\n"); 21785 return -EINVAL; 21786 } 21787 21788 if (insn[0].src_reg == 0) 21789 /* valid generic load 64-bit imm */ 21790 goto next_insn; 21791 21792 if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) { 21793 aux = &env->insn_aux_data[i]; 21794 err = check_pseudo_btf_id(env, insn, aux); 21795 if (err) 21796 return err; 21797 goto next_insn; 21798 } 21799 21800 if (insn[0].src_reg == BPF_PSEUDO_FUNC) { 21801 aux = &env->insn_aux_data[i]; 21802 aux->ptr_type = PTR_TO_FUNC; 21803 goto next_insn; 21804 } 21805 21806 /* In final convert_pseudo_ld_imm64() step, this is 21807 * converted into regular 64-bit imm load insn. 21808 */ 21809 switch (insn[0].src_reg) { 21810 case BPF_PSEUDO_MAP_VALUE: 21811 case BPF_PSEUDO_MAP_IDX_VALUE: 21812 break; 21813 case BPF_PSEUDO_MAP_FD: 21814 case BPF_PSEUDO_MAP_IDX: 21815 if (insn[1].imm == 0) 21816 break; 21817 fallthrough; 21818 default: 21819 verbose(env, "unrecognized bpf_ld_imm64 insn\n"); 21820 return -EINVAL; 21821 } 21822 21823 switch (insn[0].src_reg) { 21824 case BPF_PSEUDO_MAP_IDX_VALUE: 21825 case BPF_PSEUDO_MAP_IDX: 21826 if (bpfptr_is_null(env->fd_array)) { 21827 verbose(env, "fd_idx without fd_array is invalid\n"); 21828 return -EPROTO; 21829 } 21830 if (copy_from_bpfptr_offset(&fd, env->fd_array, 21831 insn[0].imm * sizeof(fd), 21832 sizeof(fd))) 21833 return -EFAULT; 21834 break; 21835 default: 21836 fd = insn[0].imm; 21837 break; 21838 } 21839 21840 map_idx = add_used_map(env, fd); 21841 if (map_idx < 0) 21842 return map_idx; 21843 map = env->used_maps[map_idx]; 21844 21845 aux = &env->insn_aux_data[i]; 21846 aux->map_index = map_idx; 21847 21848 if (insn[0].src_reg == BPF_PSEUDO_MAP_FD || 21849 insn[0].src_reg == BPF_PSEUDO_MAP_IDX) { 21850 addr = (unsigned long)map; 21851 } else { 21852 u32 off = insn[1].imm; 21853 21854 if (!map->ops->map_direct_value_addr) { 21855 verbose(env, "no direct value access support for this map type\n"); 21856 return -EINVAL; 21857 } 21858 21859 err = map->ops->map_direct_value_addr(map, &addr, off); 21860 if (err) { 21861 verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", 21862 map->value_size, off); 21863 return err; 21864 } 21865 21866 aux->map_off = off; 21867 addr += off; 21868 } 21869 21870 insn[0].imm = (u32)addr; 21871 insn[1].imm = addr >> 32; 21872 21873 next_insn: 21874 insn++; 21875 i++; 21876 continue; 21877 } 21878 21879 /* Basic sanity check before we invest more work here. */ 21880 if (!bpf_opcode_in_insntable(insn->code)) { 21881 verbose(env, "unknown opcode %02x\n", insn->code); 21882 return -EINVAL; 21883 } 21884 } 21885 21886 /* now all pseudo BPF_LD_IMM64 instructions load valid 21887 * 'struct bpf_map *' into a register instead of user map_fd. 21888 * These pointers will be used later by verifier to validate map access. 21889 */ 21890 return 0; 21891 } 21892 21893 /* drop refcnt of maps used by the rejected program */ 21894 static void release_maps(struct bpf_verifier_env *env) 21895 { 21896 __bpf_free_used_maps(env->prog->aux, env->used_maps, 21897 env->used_map_cnt); 21898 } 21899 21900 /* drop refcnt of maps used by the rejected program */ 21901 static void release_btfs(struct bpf_verifier_env *env) 21902 { 21903 __bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt); 21904 } 21905 21906 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ 21907 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) 21908 { 21909 struct bpf_insn *insn = env->prog->insnsi; 21910 int insn_cnt = env->prog->len; 21911 int i; 21912 21913 for (i = 0; i < insn_cnt; i++, insn++) { 21914 if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) 21915 continue; 21916 if (insn->src_reg == BPF_PSEUDO_FUNC) 21917 continue; 21918 insn->src_reg = 0; 21919 } 21920 } 21921 21922 /* single env->prog->insni[off] instruction was replaced with the range 21923 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying 21924 * [0, off) and [off, end) to new locations, so the patched range stays zero 21925 */ 21926 static void adjust_insn_aux_data(struct bpf_verifier_env *env, 21927 struct bpf_prog *new_prog, u32 off, u32 cnt) 21928 { 21929 struct bpf_insn_aux_data *data = env->insn_aux_data; 21930 struct bpf_insn *insn = new_prog->insnsi; 21931 u32 old_seen = data[off].seen; 21932 u32 prog_len; 21933 int i; 21934 21935 /* aux info at OFF always needs adjustment, no matter fast path 21936 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the 21937 * original insn at old prog. 21938 */ 21939 data[off].zext_dst = insn_has_def32(insn + off + cnt - 1); 21940 21941 if (cnt == 1) 21942 return; 21943 prog_len = new_prog->len; 21944 21945 memmove(data + off + cnt - 1, data + off, 21946 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); 21947 memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1)); 21948 for (i = off; i < off + cnt - 1; i++) { 21949 /* Expand insni[off]'s seen count to the patched range. */ 21950 data[i].seen = old_seen; 21951 data[i].zext_dst = insn_has_def32(insn + i); 21952 } 21953 } 21954 21955 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) 21956 { 21957 int i; 21958 21959 if (len == 1) 21960 return; 21961 /* NOTE: fake 'exit' subprog should be updated as well. */ 21962 for (i = 0; i <= env->subprog_cnt; i++) { 21963 if (env->subprog_info[i].start <= off) 21964 continue; 21965 env->subprog_info[i].start += len - 1; 21966 } 21967 } 21968 21969 static void release_insn_arrays(struct bpf_verifier_env *env) 21970 { 21971 int i; 21972 21973 for (i = 0; i < env->insn_array_map_cnt; i++) 21974 bpf_insn_array_release(env->insn_array_maps[i]); 21975 } 21976 21977 static void adjust_insn_arrays(struct bpf_verifier_env *env, u32 off, u32 len) 21978 { 21979 int i; 21980 21981 if (len == 1) 21982 return; 21983 21984 for (i = 0; i < env->insn_array_map_cnt; i++) 21985 bpf_insn_array_adjust(env->insn_array_maps[i], off, len); 21986 } 21987 21988 static void adjust_insn_arrays_after_remove(struct bpf_verifier_env *env, u32 off, u32 len) 21989 { 21990 int i; 21991 21992 for (i = 0; i < env->insn_array_map_cnt; i++) 21993 bpf_insn_array_adjust_after_remove(env->insn_array_maps[i], off, len); 21994 } 21995 21996 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len) 21997 { 21998 struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab; 21999 int i, sz = prog->aux->size_poke_tab; 22000 struct bpf_jit_poke_descriptor *desc; 22001 22002 for (i = 0; i < sz; i++) { 22003 desc = &tab[i]; 22004 if (desc->insn_idx <= off) 22005 continue; 22006 desc->insn_idx += len - 1; 22007 } 22008 } 22009 22010 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, 22011 const struct bpf_insn *patch, u32 len) 22012 { 22013 struct bpf_prog *new_prog; 22014 struct bpf_insn_aux_data *new_data = NULL; 22015 22016 if (len > 1) { 22017 new_data = vrealloc(env->insn_aux_data, 22018 array_size(env->prog->len + len - 1, 22019 sizeof(struct bpf_insn_aux_data)), 22020 GFP_KERNEL_ACCOUNT | __GFP_ZERO); 22021 if (!new_data) 22022 return NULL; 22023 22024 env->insn_aux_data = new_data; 22025 } 22026 22027 new_prog = bpf_patch_insn_single(env->prog, off, patch, len); 22028 if (IS_ERR(new_prog)) { 22029 if (PTR_ERR(new_prog) == -ERANGE) 22030 verbose(env, 22031 "insn %d cannot be patched due to 16-bit range\n", 22032 env->insn_aux_data[off].orig_idx); 22033 return NULL; 22034 } 22035 adjust_insn_aux_data(env, new_prog, off, len); 22036 adjust_subprog_starts(env, off, len); 22037 adjust_insn_arrays(env, off, len); 22038 adjust_poke_descs(new_prog, off, len); 22039 return new_prog; 22040 } 22041 22042 /* 22043 * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the 22044 * jump offset by 'delta'. 22045 */ 22046 static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta) 22047 { 22048 struct bpf_insn *insn = prog->insnsi; 22049 u32 insn_cnt = prog->len, i; 22050 s32 imm; 22051 s16 off; 22052 22053 for (i = 0; i < insn_cnt; i++, insn++) { 22054 u8 code = insn->code; 22055 22056 if (tgt_idx <= i && i < tgt_idx + delta) 22057 continue; 22058 22059 if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) || 22060 BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT) 22061 continue; 22062 22063 if (insn->code == (BPF_JMP32 | BPF_JA)) { 22064 if (i + 1 + insn->imm != tgt_idx) 22065 continue; 22066 if (check_add_overflow(insn->imm, delta, &imm)) 22067 return -ERANGE; 22068 insn->imm = imm; 22069 } else { 22070 if (i + 1 + insn->off != tgt_idx) 22071 continue; 22072 if (check_add_overflow(insn->off, delta, &off)) 22073 return -ERANGE; 22074 insn->off = off; 22075 } 22076 } 22077 return 0; 22078 } 22079 22080 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, 22081 u32 off, u32 cnt) 22082 { 22083 int i, j; 22084 22085 /* find first prog starting at or after off (first to remove) */ 22086 for (i = 0; i < env->subprog_cnt; i++) 22087 if (env->subprog_info[i].start >= off) 22088 break; 22089 /* find first prog starting at or after off + cnt (first to stay) */ 22090 for (j = i; j < env->subprog_cnt; j++) 22091 if (env->subprog_info[j].start >= off + cnt) 22092 break; 22093 /* if j doesn't start exactly at off + cnt, we are just removing 22094 * the front of previous prog 22095 */ 22096 if (env->subprog_info[j].start != off + cnt) 22097 j--; 22098 22099 if (j > i) { 22100 struct bpf_prog_aux *aux = env->prog->aux; 22101 int move; 22102 22103 /* move fake 'exit' subprog as well */ 22104 move = env->subprog_cnt + 1 - j; 22105 22106 memmove(env->subprog_info + i, 22107 env->subprog_info + j, 22108 sizeof(*env->subprog_info) * move); 22109 env->subprog_cnt -= j - i; 22110 22111 /* remove func_info */ 22112 if (aux->func_info) { 22113 move = aux->func_info_cnt - j; 22114 22115 memmove(aux->func_info + i, 22116 aux->func_info + j, 22117 sizeof(*aux->func_info) * move); 22118 aux->func_info_cnt -= j - i; 22119 /* func_info->insn_off is set after all code rewrites, 22120 * in adjust_btf_func() - no need to adjust 22121 */ 22122 } 22123 } else { 22124 /* convert i from "first prog to remove" to "first to adjust" */ 22125 if (env->subprog_info[i].start == off) 22126 i++; 22127 } 22128 22129 /* update fake 'exit' subprog as well */ 22130 for (; i <= env->subprog_cnt; i++) 22131 env->subprog_info[i].start -= cnt; 22132 22133 return 0; 22134 } 22135 22136 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, 22137 u32 cnt) 22138 { 22139 struct bpf_prog *prog = env->prog; 22140 u32 i, l_off, l_cnt, nr_linfo; 22141 struct bpf_line_info *linfo; 22142 22143 nr_linfo = prog->aux->nr_linfo; 22144 if (!nr_linfo) 22145 return 0; 22146 22147 linfo = prog->aux->linfo; 22148 22149 /* find first line info to remove, count lines to be removed */ 22150 for (i = 0; i < nr_linfo; i++) 22151 if (linfo[i].insn_off >= off) 22152 break; 22153 22154 l_off = i; 22155 l_cnt = 0; 22156 for (; i < nr_linfo; i++) 22157 if (linfo[i].insn_off < off + cnt) 22158 l_cnt++; 22159 else 22160 break; 22161 22162 /* First live insn doesn't match first live linfo, it needs to "inherit" 22163 * last removed linfo. prog is already modified, so prog->len == off 22164 * means no live instructions after (tail of the program was removed). 22165 */ 22166 if (prog->len != off && l_cnt && 22167 (i == nr_linfo || linfo[i].insn_off != off + cnt)) { 22168 l_cnt--; 22169 linfo[--i].insn_off = off + cnt; 22170 } 22171 22172 /* remove the line info which refer to the removed instructions */ 22173 if (l_cnt) { 22174 memmove(linfo + l_off, linfo + i, 22175 sizeof(*linfo) * (nr_linfo - i)); 22176 22177 prog->aux->nr_linfo -= l_cnt; 22178 nr_linfo = prog->aux->nr_linfo; 22179 } 22180 22181 /* pull all linfo[i].insn_off >= off + cnt in by cnt */ 22182 for (i = l_off; i < nr_linfo; i++) 22183 linfo[i].insn_off -= cnt; 22184 22185 /* fix up all subprogs (incl. 'exit') which start >= off */ 22186 for (i = 0; i <= env->subprog_cnt; i++) 22187 if (env->subprog_info[i].linfo_idx > l_off) { 22188 /* program may have started in the removed region but 22189 * may not be fully removed 22190 */ 22191 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) 22192 env->subprog_info[i].linfo_idx -= l_cnt; 22193 else 22194 env->subprog_info[i].linfo_idx = l_off; 22195 } 22196 22197 return 0; 22198 } 22199 22200 /* 22201 * Clean up dynamically allocated fields of aux data for instructions [start, ...] 22202 */ 22203 static void clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len) 22204 { 22205 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 22206 struct bpf_insn *insns = env->prog->insnsi; 22207 int end = start + len; 22208 int i; 22209 22210 for (i = start; i < end; i++) { 22211 if (aux_data[i].jt) { 22212 kvfree(aux_data[i].jt); 22213 aux_data[i].jt = NULL; 22214 } 22215 22216 if (bpf_is_ldimm64(&insns[i])) 22217 i++; 22218 } 22219 } 22220 22221 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) 22222 { 22223 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 22224 unsigned int orig_prog_len = env->prog->len; 22225 int err; 22226 22227 if (bpf_prog_is_offloaded(env->prog->aux)) 22228 bpf_prog_offload_remove_insns(env, off, cnt); 22229 22230 /* Should be called before bpf_remove_insns, as it uses prog->insnsi */ 22231 clear_insn_aux_data(env, off, cnt); 22232 22233 err = bpf_remove_insns(env->prog, off, cnt); 22234 if (err) 22235 return err; 22236 22237 err = adjust_subprog_starts_after_remove(env, off, cnt); 22238 if (err) 22239 return err; 22240 22241 err = bpf_adj_linfo_after_remove(env, off, cnt); 22242 if (err) 22243 return err; 22244 22245 adjust_insn_arrays_after_remove(env, off, cnt); 22246 22247 memmove(aux_data + off, aux_data + off + cnt, 22248 sizeof(*aux_data) * (orig_prog_len - off - cnt)); 22249 22250 return 0; 22251 } 22252 22253 /* The verifier does more data flow analysis than llvm and will not 22254 * explore branches that are dead at run time. Malicious programs can 22255 * have dead code too. Therefore replace all dead at-run-time code 22256 * with 'ja -1'. 22257 * 22258 * Just nops are not optimal, e.g. if they would sit at the end of the 22259 * program and through another bug we would manage to jump there, then 22260 * we'd execute beyond program memory otherwise. Returning exception 22261 * code also wouldn't work since we can have subprogs where the dead 22262 * code could be located. 22263 */ 22264 static void sanitize_dead_code(struct bpf_verifier_env *env) 22265 { 22266 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 22267 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1); 22268 struct bpf_insn *insn = env->prog->insnsi; 22269 const int insn_cnt = env->prog->len; 22270 int i; 22271 22272 for (i = 0; i < insn_cnt; i++) { 22273 if (aux_data[i].seen) 22274 continue; 22275 memcpy(insn + i, &trap, sizeof(trap)); 22276 aux_data[i].zext_dst = false; 22277 } 22278 } 22279 22280 static bool insn_is_cond_jump(u8 code) 22281 { 22282 u8 op; 22283 22284 op = BPF_OP(code); 22285 if (BPF_CLASS(code) == BPF_JMP32) 22286 return op != BPF_JA; 22287 22288 if (BPF_CLASS(code) != BPF_JMP) 22289 return false; 22290 22291 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL; 22292 } 22293 22294 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) 22295 { 22296 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 22297 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); 22298 struct bpf_insn *insn = env->prog->insnsi; 22299 const int insn_cnt = env->prog->len; 22300 int i; 22301 22302 for (i = 0; i < insn_cnt; i++, insn++) { 22303 if (!insn_is_cond_jump(insn->code)) 22304 continue; 22305 22306 if (!aux_data[i + 1].seen) 22307 ja.off = insn->off; 22308 else if (!aux_data[i + 1 + insn->off].seen) 22309 ja.off = 0; 22310 else 22311 continue; 22312 22313 if (bpf_prog_is_offloaded(env->prog->aux)) 22314 bpf_prog_offload_replace_insn(env, i, &ja); 22315 22316 memcpy(insn, &ja, sizeof(ja)); 22317 } 22318 } 22319 22320 static int opt_remove_dead_code(struct bpf_verifier_env *env) 22321 { 22322 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 22323 int insn_cnt = env->prog->len; 22324 int i, err; 22325 22326 for (i = 0; i < insn_cnt; i++) { 22327 int j; 22328 22329 j = 0; 22330 while (i + j < insn_cnt && !aux_data[i + j].seen) 22331 j++; 22332 if (!j) 22333 continue; 22334 22335 err = verifier_remove_insns(env, i, j); 22336 if (err) 22337 return err; 22338 insn_cnt = env->prog->len; 22339 } 22340 22341 return 0; 22342 } 22343 22344 static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0); 22345 static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0); 22346 22347 static int opt_remove_nops(struct bpf_verifier_env *env) 22348 { 22349 struct bpf_insn *insn = env->prog->insnsi; 22350 int insn_cnt = env->prog->len; 22351 bool is_may_goto_0, is_ja; 22352 int i, err; 22353 22354 for (i = 0; i < insn_cnt; i++) { 22355 is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0)); 22356 is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP)); 22357 22358 if (!is_may_goto_0 && !is_ja) 22359 continue; 22360 22361 err = verifier_remove_insns(env, i, 1); 22362 if (err) 22363 return err; 22364 insn_cnt--; 22365 /* Go back one insn to catch may_goto +1; may_goto +0 sequence */ 22366 i -= (is_may_goto_0 && i > 0) ? 2 : 1; 22367 } 22368 22369 return 0; 22370 } 22371 22372 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, 22373 const union bpf_attr *attr) 22374 { 22375 struct bpf_insn *patch; 22376 /* use env->insn_buf as two independent buffers */ 22377 struct bpf_insn *zext_patch = env->insn_buf; 22378 struct bpf_insn *rnd_hi32_patch = &env->insn_buf[2]; 22379 struct bpf_insn_aux_data *aux = env->insn_aux_data; 22380 int i, patch_len, delta = 0, len = env->prog->len; 22381 struct bpf_insn *insns = env->prog->insnsi; 22382 struct bpf_prog *new_prog; 22383 bool rnd_hi32; 22384 22385 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32; 22386 zext_patch[1] = BPF_ZEXT_REG(0); 22387 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0); 22388 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); 22389 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX); 22390 for (i = 0; i < len; i++) { 22391 int adj_idx = i + delta; 22392 struct bpf_insn insn; 22393 int load_reg; 22394 22395 insn = insns[adj_idx]; 22396 load_reg = insn_def_regno(&insn); 22397 if (!aux[adj_idx].zext_dst) { 22398 u8 code, class; 22399 u32 imm_rnd; 22400 22401 if (!rnd_hi32) 22402 continue; 22403 22404 code = insn.code; 22405 class = BPF_CLASS(code); 22406 if (load_reg == -1) 22407 continue; 22408 22409 /* NOTE: arg "reg" (the fourth one) is only used for 22410 * BPF_STX + SRC_OP, so it is safe to pass NULL 22411 * here. 22412 */ 22413 if (is_reg64(&insn, load_reg, NULL, DST_OP)) { 22414 if (class == BPF_LD && 22415 BPF_MODE(code) == BPF_IMM) 22416 i++; 22417 continue; 22418 } 22419 22420 /* ctx load could be transformed into wider load. */ 22421 if (class == BPF_LDX && 22422 aux[adj_idx].ptr_type == PTR_TO_CTX) 22423 continue; 22424 22425 imm_rnd = get_random_u32(); 22426 rnd_hi32_patch[0] = insn; 22427 rnd_hi32_patch[1].imm = imm_rnd; 22428 rnd_hi32_patch[3].dst_reg = load_reg; 22429 patch = rnd_hi32_patch; 22430 patch_len = 4; 22431 goto apply_patch_buffer; 22432 } 22433 22434 /* Add in an zero-extend instruction if a) the JIT has requested 22435 * it or b) it's a CMPXCHG. 22436 * 22437 * The latter is because: BPF_CMPXCHG always loads a value into 22438 * R0, therefore always zero-extends. However some archs' 22439 * equivalent instruction only does this load when the 22440 * comparison is successful. This detail of CMPXCHG is 22441 * orthogonal to the general zero-extension behaviour of the 22442 * CPU, so it's treated independently of bpf_jit_needs_zext. 22443 */ 22444 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn)) 22445 continue; 22446 22447 /* Zero-extension is done by the caller. */ 22448 if (bpf_pseudo_kfunc_call(&insn)) 22449 continue; 22450 22451 if (verifier_bug_if(load_reg == -1, env, 22452 "zext_dst is set, but no reg is defined")) 22453 return -EFAULT; 22454 22455 zext_patch[0] = insn; 22456 zext_patch[1].dst_reg = load_reg; 22457 zext_patch[1].src_reg = load_reg; 22458 patch = zext_patch; 22459 patch_len = 2; 22460 apply_patch_buffer: 22461 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len); 22462 if (!new_prog) 22463 return -ENOMEM; 22464 env->prog = new_prog; 22465 insns = new_prog->insnsi; 22466 aux = env->insn_aux_data; 22467 delta += patch_len - 1; 22468 } 22469 22470 return 0; 22471 } 22472 22473 /* convert load instructions that access fields of a context type into a 22474 * sequence of instructions that access fields of the underlying structure: 22475 * struct __sk_buff -> struct sk_buff 22476 * struct bpf_sock_ops -> struct sock 22477 */ 22478 static int convert_ctx_accesses(struct bpf_verifier_env *env) 22479 { 22480 struct bpf_subprog_info *subprogs = env->subprog_info; 22481 const struct bpf_verifier_ops *ops = env->ops; 22482 int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0; 22483 const int insn_cnt = env->prog->len; 22484 struct bpf_insn *epilogue_buf = env->epilogue_buf; 22485 struct bpf_insn *insn_buf = env->insn_buf; 22486 struct bpf_insn *insn; 22487 u32 target_size, size_default, off; 22488 struct bpf_prog *new_prog; 22489 enum bpf_access_type type; 22490 bool is_narrower_load; 22491 int epilogue_idx = 0; 22492 22493 if (ops->gen_epilogue) { 22494 epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog, 22495 -(subprogs[0].stack_depth + 8)); 22496 if (epilogue_cnt >= INSN_BUF_SIZE) { 22497 verifier_bug(env, "epilogue is too long"); 22498 return -EFAULT; 22499 } else if (epilogue_cnt) { 22500 /* Save the ARG_PTR_TO_CTX for the epilogue to use */ 22501 cnt = 0; 22502 subprogs[0].stack_depth += 8; 22503 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1, 22504 -subprogs[0].stack_depth); 22505 insn_buf[cnt++] = env->prog->insnsi[0]; 22506 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); 22507 if (!new_prog) 22508 return -ENOMEM; 22509 env->prog = new_prog; 22510 delta += cnt - 1; 22511 22512 ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1); 22513 if (ret < 0) 22514 return ret; 22515 } 22516 } 22517 22518 if (ops->gen_prologue || env->seen_direct_write) { 22519 if (!ops->gen_prologue) { 22520 verifier_bug(env, "gen_prologue is null"); 22521 return -EFAULT; 22522 } 22523 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, 22524 env->prog); 22525 if (cnt >= INSN_BUF_SIZE) { 22526 verifier_bug(env, "prologue is too long"); 22527 return -EFAULT; 22528 } else if (cnt) { 22529 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); 22530 if (!new_prog) 22531 return -ENOMEM; 22532 22533 env->prog = new_prog; 22534 delta += cnt - 1; 22535 22536 ret = add_kfunc_in_insns(env, insn_buf, cnt - 1); 22537 if (ret < 0) 22538 return ret; 22539 } 22540 } 22541 22542 if (delta) 22543 WARN_ON(adjust_jmp_off(env->prog, 0, delta)); 22544 22545 if (bpf_prog_is_offloaded(env->prog->aux)) 22546 return 0; 22547 22548 insn = env->prog->insnsi + delta; 22549 22550 for (i = 0; i < insn_cnt; i++, insn++) { 22551 bpf_convert_ctx_access_t convert_ctx_access; 22552 u8 mode; 22553 22554 if (env->insn_aux_data[i + delta].nospec) { 22555 WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state); 22556 struct bpf_insn *patch = insn_buf; 22557 22558 *patch++ = BPF_ST_NOSPEC(); 22559 *patch++ = *insn; 22560 cnt = patch - insn_buf; 22561 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 22562 if (!new_prog) 22563 return -ENOMEM; 22564 22565 delta += cnt - 1; 22566 env->prog = new_prog; 22567 insn = new_prog->insnsi + i + delta; 22568 /* This can not be easily merged with the 22569 * nospec_result-case, because an insn may require a 22570 * nospec before and after itself. Therefore also do not 22571 * 'continue' here but potentially apply further 22572 * patching to insn. *insn should equal patch[1] now. 22573 */ 22574 } 22575 22576 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || 22577 insn->code == (BPF_LDX | BPF_MEM | BPF_H) || 22578 insn->code == (BPF_LDX | BPF_MEM | BPF_W) || 22579 insn->code == (BPF_LDX | BPF_MEM | BPF_DW) || 22580 insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) || 22581 insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) || 22582 insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) { 22583 type = BPF_READ; 22584 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || 22585 insn->code == (BPF_STX | BPF_MEM | BPF_H) || 22586 insn->code == (BPF_STX | BPF_MEM | BPF_W) || 22587 insn->code == (BPF_STX | BPF_MEM | BPF_DW) || 22588 insn->code == (BPF_ST | BPF_MEM | BPF_B) || 22589 insn->code == (BPF_ST | BPF_MEM | BPF_H) || 22590 insn->code == (BPF_ST | BPF_MEM | BPF_W) || 22591 insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { 22592 type = BPF_WRITE; 22593 } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) || 22594 insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) || 22595 insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) || 22596 insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) && 22597 env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) { 22598 insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code); 22599 env->prog->aux->num_exentries++; 22600 continue; 22601 } else if (insn->code == (BPF_JMP | BPF_EXIT) && 22602 epilogue_cnt && 22603 i + delta < subprogs[1].start) { 22604 /* Generate epilogue for the main prog */ 22605 if (epilogue_idx) { 22606 /* jump back to the earlier generated epilogue */ 22607 insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1); 22608 cnt = 1; 22609 } else { 22610 memcpy(insn_buf, epilogue_buf, 22611 epilogue_cnt * sizeof(*epilogue_buf)); 22612 cnt = epilogue_cnt; 22613 /* epilogue_idx cannot be 0. It must have at 22614 * least one ctx ptr saving insn before the 22615 * epilogue. 22616 */ 22617 epilogue_idx = i + delta; 22618 } 22619 goto patch_insn_buf; 22620 } else { 22621 continue; 22622 } 22623 22624 if (type == BPF_WRITE && 22625 env->insn_aux_data[i + delta].nospec_result) { 22626 /* nospec_result is only used to mitigate Spectre v4 and 22627 * to limit verification-time for Spectre v1. 22628 */ 22629 struct bpf_insn *patch = insn_buf; 22630 22631 *patch++ = *insn; 22632 *patch++ = BPF_ST_NOSPEC(); 22633 cnt = patch - insn_buf; 22634 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 22635 if (!new_prog) 22636 return -ENOMEM; 22637 22638 delta += cnt - 1; 22639 env->prog = new_prog; 22640 insn = new_prog->insnsi + i + delta; 22641 continue; 22642 } 22643 22644 switch ((int)env->insn_aux_data[i + delta].ptr_type) { 22645 case PTR_TO_CTX: 22646 if (!ops->convert_ctx_access) 22647 continue; 22648 convert_ctx_access = ops->convert_ctx_access; 22649 break; 22650 case PTR_TO_SOCKET: 22651 case PTR_TO_SOCK_COMMON: 22652 convert_ctx_access = bpf_sock_convert_ctx_access; 22653 break; 22654 case PTR_TO_TCP_SOCK: 22655 convert_ctx_access = bpf_tcp_sock_convert_ctx_access; 22656 break; 22657 case PTR_TO_XDP_SOCK: 22658 convert_ctx_access = bpf_xdp_sock_convert_ctx_access; 22659 break; 22660 case PTR_TO_BTF_ID: 22661 case PTR_TO_BTF_ID | PTR_UNTRUSTED: 22662 /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike 22663 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot 22664 * be said once it is marked PTR_UNTRUSTED, hence we must handle 22665 * any faults for loads into such types. BPF_WRITE is disallowed 22666 * for this case. 22667 */ 22668 case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED: 22669 case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED: 22670 if (type == BPF_READ) { 22671 if (BPF_MODE(insn->code) == BPF_MEM) 22672 insn->code = BPF_LDX | BPF_PROBE_MEM | 22673 BPF_SIZE((insn)->code); 22674 else 22675 insn->code = BPF_LDX | BPF_PROBE_MEMSX | 22676 BPF_SIZE((insn)->code); 22677 env->prog->aux->num_exentries++; 22678 } 22679 continue; 22680 case PTR_TO_ARENA: 22681 if (BPF_MODE(insn->code) == BPF_MEMSX) { 22682 if (!bpf_jit_supports_insn(insn, true)) { 22683 verbose(env, "sign extending loads from arena are not supported yet\n"); 22684 return -EOPNOTSUPP; 22685 } 22686 insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code); 22687 } else { 22688 insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code); 22689 } 22690 env->prog->aux->num_exentries++; 22691 continue; 22692 default: 22693 continue; 22694 } 22695 22696 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; 22697 size = BPF_LDST_BYTES(insn); 22698 mode = BPF_MODE(insn->code); 22699 22700 /* If the read access is a narrower load of the field, 22701 * convert to a 4/8-byte load, to minimum program type specific 22702 * convert_ctx_access changes. If conversion is successful, 22703 * we will apply proper mask to the result. 22704 */ 22705 is_narrower_load = size < ctx_field_size; 22706 size_default = bpf_ctx_off_adjust_machine(ctx_field_size); 22707 off = insn->off; 22708 if (is_narrower_load) { 22709 u8 size_code; 22710 22711 if (type == BPF_WRITE) { 22712 verifier_bug(env, "narrow ctx access misconfigured"); 22713 return -EFAULT; 22714 } 22715 22716 size_code = BPF_H; 22717 if (ctx_field_size == 4) 22718 size_code = BPF_W; 22719 else if (ctx_field_size == 8) 22720 size_code = BPF_DW; 22721 22722 insn->off = off & ~(size_default - 1); 22723 insn->code = BPF_LDX | BPF_MEM | size_code; 22724 } 22725 22726 target_size = 0; 22727 cnt = convert_ctx_access(type, insn, insn_buf, env->prog, 22728 &target_size); 22729 if (cnt == 0 || cnt >= INSN_BUF_SIZE || 22730 (ctx_field_size && !target_size)) { 22731 verifier_bug(env, "error during ctx access conversion (%d)", cnt); 22732 return -EFAULT; 22733 } 22734 22735 if (is_narrower_load && size < target_size) { 22736 u8 shift = bpf_ctx_narrow_access_offset( 22737 off, size, size_default) * 8; 22738 if (shift && cnt + 1 >= INSN_BUF_SIZE) { 22739 verifier_bug(env, "narrow ctx load misconfigured"); 22740 return -EFAULT; 22741 } 22742 if (ctx_field_size <= 4) { 22743 if (shift) 22744 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, 22745 insn->dst_reg, 22746 shift); 22747 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, 22748 (1 << size * 8) - 1); 22749 } else { 22750 if (shift) 22751 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, 22752 insn->dst_reg, 22753 shift); 22754 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, 22755 (1ULL << size * 8) - 1); 22756 } 22757 } 22758 if (mode == BPF_MEMSX) 22759 insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X, 22760 insn->dst_reg, insn->dst_reg, 22761 size * 8, 0); 22762 22763 patch_insn_buf: 22764 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 22765 if (!new_prog) 22766 return -ENOMEM; 22767 22768 delta += cnt - 1; 22769 22770 /* keep walking new program and skip insns we just inserted */ 22771 env->prog = new_prog; 22772 insn = new_prog->insnsi + i + delta; 22773 } 22774 22775 return 0; 22776 } 22777 22778 static int jit_subprogs(struct bpf_verifier_env *env) 22779 { 22780 struct bpf_prog *prog = env->prog, **func, *tmp; 22781 int i, j, subprog_start, subprog_end = 0, len, subprog; 22782 struct bpf_map *map_ptr; 22783 struct bpf_insn *insn; 22784 void *old_bpf_func; 22785 int err, num_exentries; 22786 int old_len, subprog_start_adjustment = 0; 22787 22788 if (env->subprog_cnt <= 1) 22789 return 0; 22790 22791 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 22792 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn)) 22793 continue; 22794 22795 /* Upon error here we cannot fall back to interpreter but 22796 * need a hard reject of the program. Thus -EFAULT is 22797 * propagated in any case. 22798 */ 22799 subprog = find_subprog(env, i + insn->imm + 1); 22800 if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d", 22801 i + insn->imm + 1)) 22802 return -EFAULT; 22803 /* temporarily remember subprog id inside insn instead of 22804 * aux_data, since next loop will split up all insns into funcs 22805 */ 22806 insn->off = subprog; 22807 /* remember original imm in case JIT fails and fallback 22808 * to interpreter will be needed 22809 */ 22810 env->insn_aux_data[i].call_imm = insn->imm; 22811 /* point imm to __bpf_call_base+1 from JITs point of view */ 22812 insn->imm = 1; 22813 if (bpf_pseudo_func(insn)) { 22814 #if defined(MODULES_VADDR) 22815 u64 addr = MODULES_VADDR; 22816 #else 22817 u64 addr = VMALLOC_START; 22818 #endif 22819 /* jit (e.g. x86_64) may emit fewer instructions 22820 * if it learns a u32 imm is the same as a u64 imm. 22821 * Set close enough to possible prog address. 22822 */ 22823 insn[0].imm = (u32)addr; 22824 insn[1].imm = addr >> 32; 22825 } 22826 } 22827 22828 err = bpf_prog_alloc_jited_linfo(prog); 22829 if (err) 22830 goto out_undo_insn; 22831 22832 err = -ENOMEM; 22833 func = kzalloc_objs(prog, env->subprog_cnt); 22834 if (!func) 22835 goto out_undo_insn; 22836 22837 for (i = 0; i < env->subprog_cnt; i++) { 22838 subprog_start = subprog_end; 22839 subprog_end = env->subprog_info[i + 1].start; 22840 22841 len = subprog_end - subprog_start; 22842 /* bpf_prog_run() doesn't call subprogs directly, 22843 * hence main prog stats include the runtime of subprogs. 22844 * subprogs don't have IDs and not reachable via prog_get_next_id 22845 * func[i]->stats will never be accessed and stays NULL 22846 */ 22847 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER); 22848 if (!func[i]) 22849 goto out_free; 22850 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], 22851 len * sizeof(struct bpf_insn)); 22852 func[i]->type = prog->type; 22853 func[i]->len = len; 22854 if (bpf_prog_calc_tag(func[i])) 22855 goto out_free; 22856 func[i]->is_func = 1; 22857 func[i]->sleepable = prog->sleepable; 22858 func[i]->aux->func_idx = i; 22859 /* Below members will be freed only at prog->aux */ 22860 func[i]->aux->btf = prog->aux->btf; 22861 func[i]->aux->subprog_start = subprog_start + subprog_start_adjustment; 22862 func[i]->aux->func_info = prog->aux->func_info; 22863 func[i]->aux->func_info_cnt = prog->aux->func_info_cnt; 22864 func[i]->aux->poke_tab = prog->aux->poke_tab; 22865 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab; 22866 func[i]->aux->main_prog_aux = prog->aux; 22867 22868 for (j = 0; j < prog->aux->size_poke_tab; j++) { 22869 struct bpf_jit_poke_descriptor *poke; 22870 22871 poke = &prog->aux->poke_tab[j]; 22872 if (poke->insn_idx < subprog_end && 22873 poke->insn_idx >= subprog_start) 22874 poke->aux = func[i]->aux; 22875 } 22876 22877 func[i]->aux->name[0] = 'F'; 22878 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; 22879 if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) 22880 func[i]->aux->jits_use_priv_stack = true; 22881 22882 func[i]->jit_requested = 1; 22883 func[i]->blinding_requested = prog->blinding_requested; 22884 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; 22885 func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab; 22886 func[i]->aux->linfo = prog->aux->linfo; 22887 func[i]->aux->nr_linfo = prog->aux->nr_linfo; 22888 func[i]->aux->jited_linfo = prog->aux->jited_linfo; 22889 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx; 22890 func[i]->aux->arena = prog->aux->arena; 22891 func[i]->aux->used_maps = env->used_maps; 22892 func[i]->aux->used_map_cnt = env->used_map_cnt; 22893 num_exentries = 0; 22894 insn = func[i]->insnsi; 22895 for (j = 0; j < func[i]->len; j++, insn++) { 22896 if (BPF_CLASS(insn->code) == BPF_LDX && 22897 (BPF_MODE(insn->code) == BPF_PROBE_MEM || 22898 BPF_MODE(insn->code) == BPF_PROBE_MEM32 || 22899 BPF_MODE(insn->code) == BPF_PROBE_MEM32SX || 22900 BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) 22901 num_exentries++; 22902 if ((BPF_CLASS(insn->code) == BPF_STX || 22903 BPF_CLASS(insn->code) == BPF_ST) && 22904 BPF_MODE(insn->code) == BPF_PROBE_MEM32) 22905 num_exentries++; 22906 if (BPF_CLASS(insn->code) == BPF_STX && 22907 BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) 22908 num_exentries++; 22909 } 22910 func[i]->aux->num_exentries = num_exentries; 22911 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable; 22912 func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb; 22913 func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data; 22914 func[i]->aux->might_sleep = env->subprog_info[i].might_sleep; 22915 if (!i) 22916 func[i]->aux->exception_boundary = env->seen_exception; 22917 22918 /* 22919 * To properly pass the absolute subprog start to jit 22920 * all instruction adjustments should be accumulated 22921 */ 22922 old_len = func[i]->len; 22923 func[i] = bpf_int_jit_compile(func[i]); 22924 subprog_start_adjustment += func[i]->len - old_len; 22925 22926 if (!func[i]->jited) { 22927 err = -ENOTSUPP; 22928 goto out_free; 22929 } 22930 cond_resched(); 22931 } 22932 22933 /* at this point all bpf functions were successfully JITed 22934 * now populate all bpf_calls with correct addresses and 22935 * run last pass of JIT 22936 */ 22937 for (i = 0; i < env->subprog_cnt; i++) { 22938 insn = func[i]->insnsi; 22939 for (j = 0; j < func[i]->len; j++, insn++) { 22940 if (bpf_pseudo_func(insn)) { 22941 subprog = insn->off; 22942 insn[0].imm = (u32)(long)func[subprog]->bpf_func; 22943 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; 22944 continue; 22945 } 22946 if (!bpf_pseudo_call(insn)) 22947 continue; 22948 subprog = insn->off; 22949 insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func); 22950 } 22951 22952 /* we use the aux data to keep a list of the start addresses 22953 * of the JITed images for each function in the program 22954 * 22955 * for some architectures, such as powerpc64, the imm field 22956 * might not be large enough to hold the offset of the start 22957 * address of the callee's JITed image from __bpf_call_base 22958 * 22959 * in such cases, we can lookup the start address of a callee 22960 * by using its subprog id, available from the off field of 22961 * the call instruction, as an index for this list 22962 */ 22963 func[i]->aux->func = func; 22964 func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt; 22965 func[i]->aux->real_func_cnt = env->subprog_cnt; 22966 } 22967 for (i = 0; i < env->subprog_cnt; i++) { 22968 old_bpf_func = func[i]->bpf_func; 22969 tmp = bpf_int_jit_compile(func[i]); 22970 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { 22971 verbose(env, "JIT doesn't support bpf-to-bpf calls\n"); 22972 err = -ENOTSUPP; 22973 goto out_free; 22974 } 22975 cond_resched(); 22976 } 22977 22978 /* 22979 * Cleanup func[i]->aux fields which aren't required 22980 * or can become invalid in future 22981 */ 22982 for (i = 0; i < env->subprog_cnt; i++) { 22983 func[i]->aux->used_maps = NULL; 22984 func[i]->aux->used_map_cnt = 0; 22985 } 22986 22987 /* finally lock prog and jit images for all functions and 22988 * populate kallsysm. Begin at the first subprogram, since 22989 * bpf_prog_load will add the kallsyms for the main program. 22990 */ 22991 for (i = 1; i < env->subprog_cnt; i++) { 22992 err = bpf_prog_lock_ro(func[i]); 22993 if (err) 22994 goto out_free; 22995 } 22996 22997 for (i = 1; i < env->subprog_cnt; i++) 22998 bpf_prog_kallsyms_add(func[i]); 22999 23000 /* Last step: make now unused interpreter insns from main 23001 * prog consistent for later dump requests, so they can 23002 * later look the same as if they were interpreted only. 23003 */ 23004 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 23005 if (bpf_pseudo_func(insn)) { 23006 insn[0].imm = env->insn_aux_data[i].call_imm; 23007 insn[1].imm = insn->off; 23008 insn->off = 0; 23009 continue; 23010 } 23011 if (!bpf_pseudo_call(insn)) 23012 continue; 23013 insn->off = env->insn_aux_data[i].call_imm; 23014 subprog = find_subprog(env, i + insn->off + 1); 23015 insn->imm = subprog; 23016 } 23017 23018 prog->jited = 1; 23019 prog->bpf_func = func[0]->bpf_func; 23020 prog->jited_len = func[0]->jited_len; 23021 prog->aux->extable = func[0]->aux->extable; 23022 prog->aux->num_exentries = func[0]->aux->num_exentries; 23023 prog->aux->func = func; 23024 prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt; 23025 prog->aux->real_func_cnt = env->subprog_cnt; 23026 prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func; 23027 prog->aux->exception_boundary = func[0]->aux->exception_boundary; 23028 bpf_prog_jit_attempt_done(prog); 23029 return 0; 23030 out_free: 23031 /* We failed JIT'ing, so at this point we need to unregister poke 23032 * descriptors from subprogs, so that kernel is not attempting to 23033 * patch it anymore as we're freeing the subprog JIT memory. 23034 */ 23035 for (i = 0; i < prog->aux->size_poke_tab; i++) { 23036 map_ptr = prog->aux->poke_tab[i].tail_call.map; 23037 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux); 23038 } 23039 /* At this point we're guaranteed that poke descriptors are not 23040 * live anymore. We can just unlink its descriptor table as it's 23041 * released with the main prog. 23042 */ 23043 for (i = 0; i < env->subprog_cnt; i++) { 23044 if (!func[i]) 23045 continue; 23046 func[i]->aux->poke_tab = NULL; 23047 bpf_jit_free(func[i]); 23048 } 23049 kfree(func); 23050 out_undo_insn: 23051 /* cleanup main prog to be interpreted */ 23052 prog->jit_requested = 0; 23053 prog->blinding_requested = 0; 23054 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 23055 if (!bpf_pseudo_call(insn)) 23056 continue; 23057 insn->off = 0; 23058 insn->imm = env->insn_aux_data[i].call_imm; 23059 } 23060 bpf_prog_jit_attempt_done(prog); 23061 return err; 23062 } 23063 23064 static int fixup_call_args(struct bpf_verifier_env *env) 23065 { 23066 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 23067 struct bpf_prog *prog = env->prog; 23068 struct bpf_insn *insn = prog->insnsi; 23069 bool has_kfunc_call = bpf_prog_has_kfunc_call(prog); 23070 int i, depth; 23071 #endif 23072 int err = 0; 23073 23074 if (env->prog->jit_requested && 23075 !bpf_prog_is_offloaded(env->prog->aux)) { 23076 err = jit_subprogs(env); 23077 if (err == 0) 23078 return 0; 23079 if (err == -EFAULT) 23080 return err; 23081 } 23082 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 23083 if (has_kfunc_call) { 23084 verbose(env, "calling kernel functions are not allowed in non-JITed programs\n"); 23085 return -EINVAL; 23086 } 23087 if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) { 23088 /* When JIT fails the progs with bpf2bpf calls and tail_calls 23089 * have to be rejected, since interpreter doesn't support them yet. 23090 */ 23091 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n"); 23092 return -EINVAL; 23093 } 23094 for (i = 0; i < prog->len; i++, insn++) { 23095 if (bpf_pseudo_func(insn)) { 23096 /* When JIT fails the progs with callback calls 23097 * have to be rejected, since interpreter doesn't support them yet. 23098 */ 23099 verbose(env, "callbacks are not allowed in non-JITed programs\n"); 23100 return -EINVAL; 23101 } 23102 23103 if (!bpf_pseudo_call(insn)) 23104 continue; 23105 depth = get_callee_stack_depth(env, insn, i); 23106 if (depth < 0) 23107 return depth; 23108 bpf_patch_call_args(insn, depth); 23109 } 23110 err = 0; 23111 #endif 23112 return err; 23113 } 23114 23115 /* replace a generic kfunc with a specialized version if necessary */ 23116 static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx) 23117 { 23118 struct bpf_prog *prog = env->prog; 23119 bool seen_direct_write; 23120 void *xdp_kfunc; 23121 bool is_rdonly; 23122 u32 func_id = desc->func_id; 23123 u16 offset = desc->offset; 23124 unsigned long addr = desc->addr; 23125 23126 if (offset) /* return if module BTF is used */ 23127 return 0; 23128 23129 if (bpf_dev_bound_kfunc_id(func_id)) { 23130 xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id); 23131 if (xdp_kfunc) 23132 addr = (unsigned long)xdp_kfunc; 23133 /* fallback to default kfunc when not supported by netdev */ 23134 } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { 23135 seen_direct_write = env->seen_direct_write; 23136 is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE); 23137 23138 if (is_rdonly) 23139 addr = (unsigned long)bpf_dynptr_from_skb_rdonly; 23140 23141 /* restore env->seen_direct_write to its original value, since 23142 * may_access_direct_pkt_data mutates it 23143 */ 23144 env->seen_direct_write = seen_direct_write; 23145 } else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) { 23146 if (bpf_lsm_has_d_inode_locked(prog)) 23147 addr = (unsigned long)bpf_set_dentry_xattr_locked; 23148 } else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) { 23149 if (bpf_lsm_has_d_inode_locked(prog)) 23150 addr = (unsigned long)bpf_remove_dentry_xattr_locked; 23151 } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) { 23152 if (!env->insn_aux_data[insn_idx].non_sleepable) 23153 addr = (unsigned long)bpf_dynptr_from_file_sleepable; 23154 } else if (func_id == special_kfunc_list[KF_bpf_arena_alloc_pages]) { 23155 if (env->insn_aux_data[insn_idx].non_sleepable) 23156 addr = (unsigned long)bpf_arena_alloc_pages_non_sleepable; 23157 } else if (func_id == special_kfunc_list[KF_bpf_arena_free_pages]) { 23158 if (env->insn_aux_data[insn_idx].non_sleepable) 23159 addr = (unsigned long)bpf_arena_free_pages_non_sleepable; 23160 } 23161 desc->addr = addr; 23162 return 0; 23163 } 23164 23165 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux, 23166 u16 struct_meta_reg, 23167 u16 node_offset_reg, 23168 struct bpf_insn *insn, 23169 struct bpf_insn *insn_buf, 23170 int *cnt) 23171 { 23172 struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta; 23173 struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) }; 23174 23175 insn_buf[0] = addr[0]; 23176 insn_buf[1] = addr[1]; 23177 insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off); 23178 insn_buf[3] = *insn; 23179 *cnt = 4; 23180 } 23181 23182 static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 23183 struct bpf_insn *insn_buf, int insn_idx, int *cnt) 23184 { 23185 struct bpf_kfunc_desc *desc; 23186 int err; 23187 23188 if (!insn->imm) { 23189 verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); 23190 return -EINVAL; 23191 } 23192 23193 *cnt = 0; 23194 23195 /* insn->imm has the btf func_id. Replace it with an offset relative to 23196 * __bpf_call_base, unless the JIT needs to call functions that are 23197 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()). 23198 */ 23199 desc = find_kfunc_desc(env->prog, insn->imm, insn->off); 23200 if (!desc) { 23201 verifier_bug(env, "kernel function descriptor not found for func_id %u", 23202 insn->imm); 23203 return -EFAULT; 23204 } 23205 23206 err = specialize_kfunc(env, desc, insn_idx); 23207 if (err) 23208 return err; 23209 23210 if (!bpf_jit_supports_far_kfunc_call()) 23211 insn->imm = BPF_CALL_IMM(desc->addr); 23212 23213 if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] || 23214 desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) { 23215 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; 23216 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; 23217 u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size; 23218 23219 if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) { 23220 verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d", 23221 insn_idx); 23222 return -EFAULT; 23223 } 23224 23225 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size); 23226 insn_buf[1] = addr[0]; 23227 insn_buf[2] = addr[1]; 23228 insn_buf[3] = *insn; 23229 *cnt = 4; 23230 } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] || 23231 desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] || 23232 desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) { 23233 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; 23234 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; 23235 23236 if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) { 23237 verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d", 23238 insn_idx); 23239 return -EFAULT; 23240 } 23241 23242 if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] && 23243 !kptr_struct_meta) { 23244 verifier_bug(env, "kptr_struct_meta expected at insn_idx %d", 23245 insn_idx); 23246 return -EFAULT; 23247 } 23248 23249 insn_buf[0] = addr[0]; 23250 insn_buf[1] = addr[1]; 23251 insn_buf[2] = *insn; 23252 *cnt = 3; 23253 } else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] || 23254 desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] || 23255 desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { 23256 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; 23257 int struct_meta_reg = BPF_REG_3; 23258 int node_offset_reg = BPF_REG_4; 23259 23260 /* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */ 23261 if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { 23262 struct_meta_reg = BPF_REG_4; 23263 node_offset_reg = BPF_REG_5; 23264 } 23265 23266 if (!kptr_struct_meta) { 23267 verifier_bug(env, "kptr_struct_meta expected at insn_idx %d", 23268 insn_idx); 23269 return -EFAULT; 23270 } 23271 23272 __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg, 23273 node_offset_reg, insn, insn_buf, cnt); 23274 } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || 23275 desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { 23276 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); 23277 *cnt = 1; 23278 } else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] && 23279 env->prog->expected_attach_type == BPF_TRACE_FSESSION) { 23280 /* 23281 * inline the bpf_session_is_return() for fsession: 23282 * bool bpf_session_is_return(void *ctx) 23283 * { 23284 * return (((u64 *)ctx)[-1] >> BPF_TRAMP_IS_RETURN_SHIFT) & 1; 23285 * } 23286 */ 23287 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); 23288 insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_IS_RETURN_SHIFT); 23289 insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1); 23290 *cnt = 3; 23291 } else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] && 23292 env->prog->expected_attach_type == BPF_TRACE_FSESSION) { 23293 /* 23294 * inline bpf_session_cookie() for fsession: 23295 * __u64 *bpf_session_cookie(void *ctx) 23296 * { 23297 * u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_COOKIE_INDEX_SHIFT) & 0xFF; 23298 * return &((u64 *)ctx)[-off]; 23299 * } 23300 */ 23301 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); 23302 insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_COOKIE_INDEX_SHIFT); 23303 insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); 23304 insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); 23305 insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1); 23306 insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0); 23307 *cnt = 6; 23308 } 23309 23310 if (env->insn_aux_data[insn_idx].arg_prog) { 23311 u32 regno = env->insn_aux_data[insn_idx].arg_prog; 23312 struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) }; 23313 int idx = *cnt; 23314 23315 insn_buf[idx++] = ld_addrs[0]; 23316 insn_buf[idx++] = ld_addrs[1]; 23317 insn_buf[idx++] = *insn; 23318 *cnt = idx; 23319 } 23320 return 0; 23321 } 23322 23323 /* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */ 23324 static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len) 23325 { 23326 struct bpf_subprog_info *info = env->subprog_info; 23327 int cnt = env->subprog_cnt; 23328 struct bpf_prog *prog; 23329 23330 /* We only reserve one slot for hidden subprogs in subprog_info. */ 23331 if (env->hidden_subprog_cnt) { 23332 verifier_bug(env, "only one hidden subprog supported"); 23333 return -EFAULT; 23334 } 23335 /* We're not patching any existing instruction, just appending the new 23336 * ones for the hidden subprog. Hence all of the adjustment operations 23337 * in bpf_patch_insn_data are no-ops. 23338 */ 23339 prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len); 23340 if (!prog) 23341 return -ENOMEM; 23342 env->prog = prog; 23343 info[cnt + 1].start = info[cnt].start; 23344 info[cnt].start = prog->len - len + 1; 23345 env->subprog_cnt++; 23346 env->hidden_subprog_cnt++; 23347 return 0; 23348 } 23349 23350 /* Do various post-verification rewrites in a single program pass. 23351 * These rewrites simplify JIT and interpreter implementations. 23352 */ 23353 static int do_misc_fixups(struct bpf_verifier_env *env) 23354 { 23355 struct bpf_prog *prog = env->prog; 23356 enum bpf_attach_type eatype = prog->expected_attach_type; 23357 enum bpf_prog_type prog_type = resolve_prog_type(prog); 23358 struct bpf_insn *insn = prog->insnsi; 23359 const struct bpf_func_proto *fn; 23360 const int insn_cnt = prog->len; 23361 const struct bpf_map_ops *ops; 23362 struct bpf_insn_aux_data *aux; 23363 struct bpf_insn *insn_buf = env->insn_buf; 23364 struct bpf_prog *new_prog; 23365 struct bpf_map *map_ptr; 23366 int i, ret, cnt, delta = 0, cur_subprog = 0; 23367 struct bpf_subprog_info *subprogs = env->subprog_info; 23368 u16 stack_depth = subprogs[cur_subprog].stack_depth; 23369 u16 stack_depth_extra = 0; 23370 23371 if (env->seen_exception && !env->exception_callback_subprog) { 23372 struct bpf_insn *patch = insn_buf; 23373 23374 *patch++ = env->prog->insnsi[insn_cnt - 1]; 23375 *patch++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); 23376 *patch++ = BPF_EXIT_INSN(); 23377 ret = add_hidden_subprog(env, insn_buf, patch - insn_buf); 23378 if (ret < 0) 23379 return ret; 23380 prog = env->prog; 23381 insn = prog->insnsi; 23382 23383 env->exception_callback_subprog = env->subprog_cnt - 1; 23384 /* Don't update insn_cnt, as add_hidden_subprog always appends insns */ 23385 mark_subprog_exc_cb(env, env->exception_callback_subprog); 23386 } 23387 23388 for (i = 0; i < insn_cnt;) { 23389 if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) { 23390 if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) || 23391 (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) { 23392 /* convert to 32-bit mov that clears upper 32-bit */ 23393 insn->code = BPF_ALU | BPF_MOV | BPF_X; 23394 /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */ 23395 insn->off = 0; 23396 insn->imm = 0; 23397 } /* cast from as(0) to as(1) should be handled by JIT */ 23398 goto next_insn; 23399 } 23400 23401 if (env->insn_aux_data[i + delta].needs_zext) 23402 /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */ 23403 insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code); 23404 23405 /* Make sdiv/smod divide-by-minus-one exceptions impossible. */ 23406 if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) || 23407 insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) || 23408 insn->code == (BPF_ALU | BPF_MOD | BPF_K) || 23409 insn->code == (BPF_ALU | BPF_DIV | BPF_K)) && 23410 insn->off == 1 && insn->imm == -1) { 23411 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; 23412 bool isdiv = BPF_OP(insn->code) == BPF_DIV; 23413 struct bpf_insn *patch = insn_buf; 23414 23415 if (isdiv) 23416 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 23417 BPF_NEG | BPF_K, insn->dst_reg, 23418 0, 0, 0); 23419 else 23420 *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0); 23421 23422 cnt = patch - insn_buf; 23423 23424 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 23425 if (!new_prog) 23426 return -ENOMEM; 23427 23428 delta += cnt - 1; 23429 env->prog = prog = new_prog; 23430 insn = new_prog->insnsi + i + delta; 23431 goto next_insn; 23432 } 23433 23434 /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */ 23435 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || 23436 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || 23437 insn->code == (BPF_ALU | BPF_MOD | BPF_X) || 23438 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { 23439 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; 23440 bool isdiv = BPF_OP(insn->code) == BPF_DIV; 23441 bool is_sdiv = isdiv && insn->off == 1; 23442 bool is_smod = !isdiv && insn->off == 1; 23443 struct bpf_insn *patch = insn_buf; 23444 23445 if (is_sdiv) { 23446 /* [R,W]x sdiv 0 -> 0 23447 * LLONG_MIN sdiv -1 -> LLONG_MIN 23448 * INT_MIN sdiv -1 -> INT_MIN 23449 */ 23450 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg); 23451 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 23452 BPF_ADD | BPF_K, BPF_REG_AX, 23453 0, 0, 1); 23454 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 23455 BPF_JGT | BPF_K, BPF_REG_AX, 23456 0, 4, 1); 23457 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 23458 BPF_JEQ | BPF_K, BPF_REG_AX, 23459 0, 1, 0); 23460 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 23461 BPF_MOV | BPF_K, insn->dst_reg, 23462 0, 0, 0); 23463 /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */ 23464 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 23465 BPF_NEG | BPF_K, insn->dst_reg, 23466 0, 0, 0); 23467 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 23468 *patch++ = *insn; 23469 cnt = patch - insn_buf; 23470 } else if (is_smod) { 23471 /* [R,W]x mod 0 -> [R,W]x */ 23472 /* [R,W]x mod -1 -> 0 */ 23473 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg); 23474 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | 23475 BPF_ADD | BPF_K, BPF_REG_AX, 23476 0, 0, 1); 23477 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 23478 BPF_JGT | BPF_K, BPF_REG_AX, 23479 0, 3, 1); 23480 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 23481 BPF_JEQ | BPF_K, BPF_REG_AX, 23482 0, 3 + (is64 ? 0 : 1), 1); 23483 *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0); 23484 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 23485 *patch++ = *insn; 23486 23487 if (!is64) { 23488 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 23489 *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg); 23490 } 23491 cnt = patch - insn_buf; 23492 } else if (isdiv) { 23493 /* [R,W]x div 0 -> 0 */ 23494 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 23495 BPF_JNE | BPF_K, insn->src_reg, 23496 0, 2, 0); 23497 *patch++ = BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg); 23498 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 23499 *patch++ = *insn; 23500 cnt = patch - insn_buf; 23501 } else { 23502 /* [R,W]x mod 0 -> [R,W]x */ 23503 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | 23504 BPF_JEQ | BPF_K, insn->src_reg, 23505 0, 1 + (is64 ? 0 : 1), 0); 23506 *patch++ = *insn; 23507 23508 if (!is64) { 23509 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 23510 *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg); 23511 } 23512 cnt = patch - insn_buf; 23513 } 23514 23515 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 23516 if (!new_prog) 23517 return -ENOMEM; 23518 23519 delta += cnt - 1; 23520 env->prog = prog = new_prog; 23521 insn = new_prog->insnsi + i + delta; 23522 goto next_insn; 23523 } 23524 23525 /* Make it impossible to de-reference a userspace address */ 23526 if (BPF_CLASS(insn->code) == BPF_LDX && 23527 (BPF_MODE(insn->code) == BPF_PROBE_MEM || 23528 BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) { 23529 struct bpf_insn *patch = insn_buf; 23530 u64 uaddress_limit = bpf_arch_uaddress_limit(); 23531 23532 if (!uaddress_limit) 23533 goto next_insn; 23534 23535 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg); 23536 if (insn->off) 23537 *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off); 23538 *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32); 23539 *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2); 23540 *patch++ = *insn; 23541 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 23542 *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0); 23543 23544 cnt = patch - insn_buf; 23545 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 23546 if (!new_prog) 23547 return -ENOMEM; 23548 23549 delta += cnt - 1; 23550 env->prog = prog = new_prog; 23551 insn = new_prog->insnsi + i + delta; 23552 goto next_insn; 23553 } 23554 23555 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */ 23556 if (BPF_CLASS(insn->code) == BPF_LD && 23557 (BPF_MODE(insn->code) == BPF_ABS || 23558 BPF_MODE(insn->code) == BPF_IND)) { 23559 cnt = env->ops->gen_ld_abs(insn, insn_buf); 23560 if (cnt == 0 || cnt >= INSN_BUF_SIZE) { 23561 verifier_bug(env, "%d insns generated for ld_abs", cnt); 23562 return -EFAULT; 23563 } 23564 23565 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 23566 if (!new_prog) 23567 return -ENOMEM; 23568 23569 delta += cnt - 1; 23570 env->prog = prog = new_prog; 23571 insn = new_prog->insnsi + i + delta; 23572 goto next_insn; 23573 } 23574 23575 /* Rewrite pointer arithmetic to mitigate speculation attacks. */ 23576 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || 23577 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { 23578 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; 23579 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; 23580 struct bpf_insn *patch = insn_buf; 23581 bool issrc, isneg, isimm; 23582 u32 off_reg; 23583 23584 aux = &env->insn_aux_data[i + delta]; 23585 if (!aux->alu_state || 23586 aux->alu_state == BPF_ALU_NON_POINTER) 23587 goto next_insn; 23588 23589 isneg = aux->alu_state & BPF_ALU_NEG_VALUE; 23590 issrc = (aux->alu_state & BPF_ALU_SANITIZE) == 23591 BPF_ALU_SANITIZE_SRC; 23592 isimm = aux->alu_state & BPF_ALU_IMMEDIATE; 23593 23594 off_reg = issrc ? insn->src_reg : insn->dst_reg; 23595 if (isimm) { 23596 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit); 23597 } else { 23598 if (isneg) 23599 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); 23600 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit); 23601 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); 23602 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); 23603 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); 23604 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63); 23605 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg); 23606 } 23607 if (!issrc) 23608 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg); 23609 insn->src_reg = BPF_REG_AX; 23610 if (isneg) 23611 insn->code = insn->code == code_add ? 23612 code_sub : code_add; 23613 *patch++ = *insn; 23614 if (issrc && isneg && !isimm) 23615 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); 23616 cnt = patch - insn_buf; 23617 23618 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 23619 if (!new_prog) 23620 return -ENOMEM; 23621 23622 delta += cnt - 1; 23623 env->prog = prog = new_prog; 23624 insn = new_prog->insnsi + i + delta; 23625 goto next_insn; 23626 } 23627 23628 if (is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) { 23629 int stack_off_cnt = -stack_depth - 16; 23630 23631 /* 23632 * Two 8 byte slots, depth-16 stores the count, and 23633 * depth-8 stores the start timestamp of the loop. 23634 * 23635 * The starting value of count is BPF_MAX_TIMED_LOOPS 23636 * (0xffff). Every iteration loads it and subs it by 1, 23637 * until the value becomes 0 in AX (thus, 1 in stack), 23638 * after which we call arch_bpf_timed_may_goto, which 23639 * either sets AX to 0xffff to keep looping, or to 0 23640 * upon timeout. AX is then stored into the stack. In 23641 * the next iteration, we either see 0 and break out, or 23642 * continue iterating until the next time value is 0 23643 * after subtraction, rinse and repeat. 23644 */ 23645 stack_depth_extra = 16; 23646 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt); 23647 if (insn->off >= 0) 23648 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5); 23649 else 23650 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1); 23651 insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1); 23652 insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2); 23653 /* 23654 * AX is used as an argument to pass in stack_off_cnt 23655 * (to add to r10/fp), and also as the return value of 23656 * the call to arch_bpf_timed_may_goto. 23657 */ 23658 insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt); 23659 insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto); 23660 insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt); 23661 cnt = 7; 23662 23663 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 23664 if (!new_prog) 23665 return -ENOMEM; 23666 23667 delta += cnt - 1; 23668 env->prog = prog = new_prog; 23669 insn = new_prog->insnsi + i + delta; 23670 goto next_insn; 23671 } else if (is_may_goto_insn(insn)) { 23672 int stack_off = -stack_depth - 8; 23673 23674 stack_depth_extra = 8; 23675 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off); 23676 if (insn->off >= 0) 23677 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2); 23678 else 23679 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1); 23680 insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1); 23681 insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off); 23682 cnt = 4; 23683 23684 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 23685 if (!new_prog) 23686 return -ENOMEM; 23687 23688 delta += cnt - 1; 23689 env->prog = prog = new_prog; 23690 insn = new_prog->insnsi + i + delta; 23691 goto next_insn; 23692 } 23693 23694 if (insn->code != (BPF_JMP | BPF_CALL)) 23695 goto next_insn; 23696 if (insn->src_reg == BPF_PSEUDO_CALL) 23697 goto next_insn; 23698 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { 23699 ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt); 23700 if (ret) 23701 return ret; 23702 if (cnt == 0) 23703 goto next_insn; 23704 23705 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 23706 if (!new_prog) 23707 return -ENOMEM; 23708 23709 delta += cnt - 1; 23710 env->prog = prog = new_prog; 23711 insn = new_prog->insnsi + i + delta; 23712 goto next_insn; 23713 } 23714 23715 /* Skip inlining the helper call if the JIT does it. */ 23716 if (bpf_jit_inlines_helper_call(insn->imm)) 23717 goto next_insn; 23718 23719 if (insn->imm == BPF_FUNC_get_route_realm) 23720 prog->dst_needed = 1; 23721 if (insn->imm == BPF_FUNC_get_prandom_u32) 23722 bpf_user_rnd_init_once(); 23723 if (insn->imm == BPF_FUNC_override_return) 23724 prog->kprobe_override = 1; 23725 if (insn->imm == BPF_FUNC_tail_call) { 23726 /* If we tail call into other programs, we 23727 * cannot make any assumptions since they can 23728 * be replaced dynamically during runtime in 23729 * the program array. 23730 */ 23731 prog->cb_access = 1; 23732 if (!allow_tail_call_in_subprogs(env)) 23733 prog->aux->stack_depth = MAX_BPF_STACK; 23734 prog->aux->max_pkt_offset = MAX_PACKET_OFF; 23735 23736 /* mark bpf_tail_call as different opcode to avoid 23737 * conditional branch in the interpreter for every normal 23738 * call and to prevent accidental JITing by JIT compiler 23739 * that doesn't support bpf_tail_call yet 23740 */ 23741 insn->imm = 0; 23742 insn->code = BPF_JMP | BPF_TAIL_CALL; 23743 23744 aux = &env->insn_aux_data[i + delta]; 23745 if (env->bpf_capable && !prog->blinding_requested && 23746 prog->jit_requested && 23747 !bpf_map_key_poisoned(aux) && 23748 !bpf_map_ptr_poisoned(aux) && 23749 !bpf_map_ptr_unpriv(aux)) { 23750 struct bpf_jit_poke_descriptor desc = { 23751 .reason = BPF_POKE_REASON_TAIL_CALL, 23752 .tail_call.map = aux->map_ptr_state.map_ptr, 23753 .tail_call.key = bpf_map_key_immediate(aux), 23754 .insn_idx = i + delta, 23755 }; 23756 23757 ret = bpf_jit_add_poke_descriptor(prog, &desc); 23758 if (ret < 0) { 23759 verbose(env, "adding tail call poke descriptor failed\n"); 23760 return ret; 23761 } 23762 23763 insn->imm = ret + 1; 23764 goto next_insn; 23765 } 23766 23767 if (!bpf_map_ptr_unpriv(aux)) 23768 goto next_insn; 23769 23770 /* instead of changing every JIT dealing with tail_call 23771 * emit two extra insns: 23772 * if (index >= max_entries) goto out; 23773 * index &= array->index_mask; 23774 * to avoid out-of-bounds cpu speculation 23775 */ 23776 if (bpf_map_ptr_poisoned(aux)) { 23777 verbose(env, "tail_call abusing map_ptr\n"); 23778 return -EINVAL; 23779 } 23780 23781 map_ptr = aux->map_ptr_state.map_ptr; 23782 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, 23783 map_ptr->max_entries, 2); 23784 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, 23785 container_of(map_ptr, 23786 struct bpf_array, 23787 map)->index_mask); 23788 insn_buf[2] = *insn; 23789 cnt = 3; 23790 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 23791 if (!new_prog) 23792 return -ENOMEM; 23793 23794 delta += cnt - 1; 23795 env->prog = prog = new_prog; 23796 insn = new_prog->insnsi + i + delta; 23797 goto next_insn; 23798 } 23799 23800 if (insn->imm == BPF_FUNC_timer_set_callback) { 23801 /* The verifier will process callback_fn as many times as necessary 23802 * with different maps and the register states prepared by 23803 * set_timer_callback_state will be accurate. 23804 * 23805 * The following use case is valid: 23806 * map1 is shared by prog1, prog2, prog3. 23807 * prog1 calls bpf_timer_init for some map1 elements 23808 * prog2 calls bpf_timer_set_callback for some map1 elements. 23809 * Those that were not bpf_timer_init-ed will return -EINVAL. 23810 * prog3 calls bpf_timer_start for some map1 elements. 23811 * Those that were not both bpf_timer_init-ed and 23812 * bpf_timer_set_callback-ed will return -EINVAL. 23813 */ 23814 struct bpf_insn ld_addrs[2] = { 23815 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux), 23816 }; 23817 23818 insn_buf[0] = ld_addrs[0]; 23819 insn_buf[1] = ld_addrs[1]; 23820 insn_buf[2] = *insn; 23821 cnt = 3; 23822 23823 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 23824 if (!new_prog) 23825 return -ENOMEM; 23826 23827 delta += cnt - 1; 23828 env->prog = prog = new_prog; 23829 insn = new_prog->insnsi + i + delta; 23830 goto patch_call_imm; 23831 } 23832 23833 if (is_storage_get_function(insn->imm)) { 23834 if (env->insn_aux_data[i + delta].non_sleepable) 23835 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC); 23836 else 23837 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL); 23838 insn_buf[1] = *insn; 23839 cnt = 2; 23840 23841 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 23842 if (!new_prog) 23843 return -ENOMEM; 23844 23845 delta += cnt - 1; 23846 env->prog = prog = new_prog; 23847 insn = new_prog->insnsi + i + delta; 23848 goto patch_call_imm; 23849 } 23850 23851 /* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */ 23852 if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) { 23853 /* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data, 23854 * bpf_mem_alloc() returns a ptr to the percpu data ptr. 23855 */ 23856 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0); 23857 insn_buf[1] = *insn; 23858 cnt = 2; 23859 23860 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 23861 if (!new_prog) 23862 return -ENOMEM; 23863 23864 delta += cnt - 1; 23865 env->prog = prog = new_prog; 23866 insn = new_prog->insnsi + i + delta; 23867 goto patch_call_imm; 23868 } 23869 23870 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup 23871 * and other inlining handlers are currently limited to 64 bit 23872 * only. 23873 */ 23874 if (prog->jit_requested && BITS_PER_LONG == 64 && 23875 (insn->imm == BPF_FUNC_map_lookup_elem || 23876 insn->imm == BPF_FUNC_map_update_elem || 23877 insn->imm == BPF_FUNC_map_delete_elem || 23878 insn->imm == BPF_FUNC_map_push_elem || 23879 insn->imm == BPF_FUNC_map_pop_elem || 23880 insn->imm == BPF_FUNC_map_peek_elem || 23881 insn->imm == BPF_FUNC_redirect_map || 23882 insn->imm == BPF_FUNC_for_each_map_elem || 23883 insn->imm == BPF_FUNC_map_lookup_percpu_elem)) { 23884 aux = &env->insn_aux_data[i + delta]; 23885 if (bpf_map_ptr_poisoned(aux)) 23886 goto patch_call_imm; 23887 23888 map_ptr = aux->map_ptr_state.map_ptr; 23889 ops = map_ptr->ops; 23890 if (insn->imm == BPF_FUNC_map_lookup_elem && 23891 ops->map_gen_lookup) { 23892 cnt = ops->map_gen_lookup(map_ptr, insn_buf); 23893 if (cnt == -EOPNOTSUPP) 23894 goto patch_map_ops_generic; 23895 if (cnt <= 0 || cnt >= INSN_BUF_SIZE) { 23896 verifier_bug(env, "%d insns generated for map lookup", cnt); 23897 return -EFAULT; 23898 } 23899 23900 new_prog = bpf_patch_insn_data(env, i + delta, 23901 insn_buf, cnt); 23902 if (!new_prog) 23903 return -ENOMEM; 23904 23905 delta += cnt - 1; 23906 env->prog = prog = new_prog; 23907 insn = new_prog->insnsi + i + delta; 23908 goto next_insn; 23909 } 23910 23911 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, 23912 (void *(*)(struct bpf_map *map, void *key))NULL)); 23913 BUILD_BUG_ON(!__same_type(ops->map_delete_elem, 23914 (long (*)(struct bpf_map *map, void *key))NULL)); 23915 BUILD_BUG_ON(!__same_type(ops->map_update_elem, 23916 (long (*)(struct bpf_map *map, void *key, void *value, 23917 u64 flags))NULL)); 23918 BUILD_BUG_ON(!__same_type(ops->map_push_elem, 23919 (long (*)(struct bpf_map *map, void *value, 23920 u64 flags))NULL)); 23921 BUILD_BUG_ON(!__same_type(ops->map_pop_elem, 23922 (long (*)(struct bpf_map *map, void *value))NULL)); 23923 BUILD_BUG_ON(!__same_type(ops->map_peek_elem, 23924 (long (*)(struct bpf_map *map, void *value))NULL)); 23925 BUILD_BUG_ON(!__same_type(ops->map_redirect, 23926 (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL)); 23927 BUILD_BUG_ON(!__same_type(ops->map_for_each_callback, 23928 (long (*)(struct bpf_map *map, 23929 bpf_callback_t callback_fn, 23930 void *callback_ctx, 23931 u64 flags))NULL)); 23932 BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem, 23933 (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL)); 23934 23935 patch_map_ops_generic: 23936 switch (insn->imm) { 23937 case BPF_FUNC_map_lookup_elem: 23938 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem); 23939 goto next_insn; 23940 case BPF_FUNC_map_update_elem: 23941 insn->imm = BPF_CALL_IMM(ops->map_update_elem); 23942 goto next_insn; 23943 case BPF_FUNC_map_delete_elem: 23944 insn->imm = BPF_CALL_IMM(ops->map_delete_elem); 23945 goto next_insn; 23946 case BPF_FUNC_map_push_elem: 23947 insn->imm = BPF_CALL_IMM(ops->map_push_elem); 23948 goto next_insn; 23949 case BPF_FUNC_map_pop_elem: 23950 insn->imm = BPF_CALL_IMM(ops->map_pop_elem); 23951 goto next_insn; 23952 case BPF_FUNC_map_peek_elem: 23953 insn->imm = BPF_CALL_IMM(ops->map_peek_elem); 23954 goto next_insn; 23955 case BPF_FUNC_redirect_map: 23956 insn->imm = BPF_CALL_IMM(ops->map_redirect); 23957 goto next_insn; 23958 case BPF_FUNC_for_each_map_elem: 23959 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); 23960 goto next_insn; 23961 case BPF_FUNC_map_lookup_percpu_elem: 23962 insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem); 23963 goto next_insn; 23964 } 23965 23966 goto patch_call_imm; 23967 } 23968 23969 /* Implement bpf_jiffies64 inline. */ 23970 if (prog->jit_requested && BITS_PER_LONG == 64 && 23971 insn->imm == BPF_FUNC_jiffies64) { 23972 struct bpf_insn ld_jiffies_addr[2] = { 23973 BPF_LD_IMM64(BPF_REG_0, 23974 (unsigned long)&jiffies), 23975 }; 23976 23977 insn_buf[0] = ld_jiffies_addr[0]; 23978 insn_buf[1] = ld_jiffies_addr[1]; 23979 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, 23980 BPF_REG_0, 0); 23981 cnt = 3; 23982 23983 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 23984 cnt); 23985 if (!new_prog) 23986 return -ENOMEM; 23987 23988 delta += cnt - 1; 23989 env->prog = prog = new_prog; 23990 insn = new_prog->insnsi + i + delta; 23991 goto next_insn; 23992 } 23993 23994 #if defined(CONFIG_X86_64) && !defined(CONFIG_UML) 23995 /* Implement bpf_get_smp_processor_id() inline. */ 23996 if (insn->imm == BPF_FUNC_get_smp_processor_id && 23997 verifier_inlines_helper_call(env, insn->imm)) { 23998 /* BPF_FUNC_get_smp_processor_id inlining is an 23999 * optimization, so if cpu_number is ever 24000 * changed in some incompatible and hard to support 24001 * way, it's fine to back out this inlining logic 24002 */ 24003 #ifdef CONFIG_SMP 24004 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number); 24005 insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); 24006 insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0); 24007 cnt = 3; 24008 #else 24009 insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0); 24010 cnt = 1; 24011 #endif 24012 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 24013 if (!new_prog) 24014 return -ENOMEM; 24015 24016 delta += cnt - 1; 24017 env->prog = prog = new_prog; 24018 insn = new_prog->insnsi + i + delta; 24019 goto next_insn; 24020 } 24021 24022 /* Implement bpf_get_current_task() and bpf_get_current_task_btf() inline. */ 24023 if ((insn->imm == BPF_FUNC_get_current_task || insn->imm == BPF_FUNC_get_current_task_btf) && 24024 verifier_inlines_helper_call(env, insn->imm)) { 24025 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)¤t_task); 24026 insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); 24027 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); 24028 cnt = 3; 24029 24030 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 24031 if (!new_prog) 24032 return -ENOMEM; 24033 24034 delta += cnt - 1; 24035 env->prog = prog = new_prog; 24036 insn = new_prog->insnsi + i + delta; 24037 goto next_insn; 24038 } 24039 #endif 24040 /* Implement bpf_get_func_arg inline. */ 24041 if (prog_type == BPF_PROG_TYPE_TRACING && 24042 insn->imm == BPF_FUNC_get_func_arg) { 24043 if (eatype == BPF_TRACE_RAW_TP) { 24044 int nr_args = btf_type_vlen(prog->aux->attach_func_proto); 24045 24046 /* skip 'void *__data' in btf_trace_##name() and save to reg0 */ 24047 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1); 24048 cnt = 1; 24049 } else { 24050 /* Load nr_args from ctx - 8 */ 24051 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); 24052 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); 24053 cnt = 2; 24054 } 24055 insn_buf[cnt++] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6); 24056 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3); 24057 insn_buf[cnt++] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1); 24058 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0); 24059 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); 24060 insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, 0); 24061 insn_buf[cnt++] = BPF_JMP_A(1); 24062 insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL); 24063 24064 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 24065 if (!new_prog) 24066 return -ENOMEM; 24067 24068 delta += cnt - 1; 24069 env->prog = prog = new_prog; 24070 insn = new_prog->insnsi + i + delta; 24071 goto next_insn; 24072 } 24073 24074 /* Implement bpf_get_func_ret inline. */ 24075 if (prog_type == BPF_PROG_TYPE_TRACING && 24076 insn->imm == BPF_FUNC_get_func_ret) { 24077 if (eatype == BPF_TRACE_FEXIT || 24078 eatype == BPF_TRACE_FSESSION || 24079 eatype == BPF_MODIFY_RETURN) { 24080 /* Load nr_args from ctx - 8 */ 24081 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); 24082 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); 24083 insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); 24084 insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1); 24085 insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); 24086 insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0); 24087 insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0); 24088 cnt = 7; 24089 } else { 24090 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP); 24091 cnt = 1; 24092 } 24093 24094 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 24095 if (!new_prog) 24096 return -ENOMEM; 24097 24098 delta += cnt - 1; 24099 env->prog = prog = new_prog; 24100 insn = new_prog->insnsi + i + delta; 24101 goto next_insn; 24102 } 24103 24104 /* Implement get_func_arg_cnt inline. */ 24105 if (prog_type == BPF_PROG_TYPE_TRACING && 24106 insn->imm == BPF_FUNC_get_func_arg_cnt) { 24107 if (eatype == BPF_TRACE_RAW_TP) { 24108 int nr_args = btf_type_vlen(prog->aux->attach_func_proto); 24109 24110 /* skip 'void *__data' in btf_trace_##name() and save to reg0 */ 24111 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1); 24112 cnt = 1; 24113 } else { 24114 /* Load nr_args from ctx - 8 */ 24115 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); 24116 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); 24117 cnt = 2; 24118 } 24119 24120 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 24121 if (!new_prog) 24122 return -ENOMEM; 24123 24124 delta += cnt - 1; 24125 env->prog = prog = new_prog; 24126 insn = new_prog->insnsi + i + delta; 24127 goto next_insn; 24128 } 24129 24130 /* Implement bpf_get_func_ip inline. */ 24131 if (prog_type == BPF_PROG_TYPE_TRACING && 24132 insn->imm == BPF_FUNC_get_func_ip) { 24133 /* Load IP address from ctx - 16 */ 24134 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16); 24135 24136 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); 24137 if (!new_prog) 24138 return -ENOMEM; 24139 24140 env->prog = prog = new_prog; 24141 insn = new_prog->insnsi + i + delta; 24142 goto next_insn; 24143 } 24144 24145 /* Implement bpf_get_branch_snapshot inline. */ 24146 if (IS_ENABLED(CONFIG_PERF_EVENTS) && 24147 prog->jit_requested && BITS_PER_LONG == 64 && 24148 insn->imm == BPF_FUNC_get_branch_snapshot) { 24149 /* We are dealing with the following func protos: 24150 * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags); 24151 * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt); 24152 */ 24153 const u32 br_entry_size = sizeof(struct perf_branch_entry); 24154 24155 /* struct perf_branch_entry is part of UAPI and is 24156 * used as an array element, so extremely unlikely to 24157 * ever grow or shrink 24158 */ 24159 BUILD_BUG_ON(br_entry_size != 24); 24160 24161 /* if (unlikely(flags)) return -EINVAL */ 24162 insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7); 24163 24164 /* Transform size (bytes) into number of entries (cnt = size / 24). 24165 * But to avoid expensive division instruction, we implement 24166 * divide-by-3 through multiplication, followed by further 24167 * division by 8 through 3-bit right shift. 24168 * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr., 24169 * p. 227, chapter "Unsigned Division by 3" for details and proofs. 24170 * 24171 * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab. 24172 */ 24173 insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab); 24174 insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0); 24175 insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36); 24176 24177 /* call perf_snapshot_branch_stack implementation */ 24178 insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack)); 24179 /* if (entry_cnt == 0) return -ENOENT */ 24180 insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4); 24181 /* return entry_cnt * sizeof(struct perf_branch_entry) */ 24182 insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size); 24183 insn_buf[7] = BPF_JMP_A(3); 24184 /* return -EINVAL; */ 24185 insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL); 24186 insn_buf[9] = BPF_JMP_A(1); 24187 /* return -ENOENT; */ 24188 insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT); 24189 cnt = 11; 24190 24191 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 24192 if (!new_prog) 24193 return -ENOMEM; 24194 24195 delta += cnt - 1; 24196 env->prog = prog = new_prog; 24197 insn = new_prog->insnsi + i + delta; 24198 goto next_insn; 24199 } 24200 24201 /* Implement bpf_kptr_xchg inline */ 24202 if (prog->jit_requested && BITS_PER_LONG == 64 && 24203 insn->imm == BPF_FUNC_kptr_xchg && 24204 bpf_jit_supports_ptr_xchg()) { 24205 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2); 24206 insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0); 24207 cnt = 2; 24208 24209 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 24210 if (!new_prog) 24211 return -ENOMEM; 24212 24213 delta += cnt - 1; 24214 env->prog = prog = new_prog; 24215 insn = new_prog->insnsi + i + delta; 24216 goto next_insn; 24217 } 24218 patch_call_imm: 24219 fn = env->ops->get_func_proto(insn->imm, env->prog); 24220 /* all functions that have prototype and verifier allowed 24221 * programs to call them, must be real in-kernel functions 24222 */ 24223 if (!fn->func) { 24224 verifier_bug(env, 24225 "not inlined functions %s#%d is missing func", 24226 func_id_name(insn->imm), insn->imm); 24227 return -EFAULT; 24228 } 24229 insn->imm = fn->func - __bpf_call_base; 24230 next_insn: 24231 if (subprogs[cur_subprog + 1].start == i + delta + 1) { 24232 subprogs[cur_subprog].stack_depth += stack_depth_extra; 24233 subprogs[cur_subprog].stack_extra = stack_depth_extra; 24234 24235 stack_depth = subprogs[cur_subprog].stack_depth; 24236 if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) { 24237 verbose(env, "stack size %d(extra %d) is too large\n", 24238 stack_depth, stack_depth_extra); 24239 return -EINVAL; 24240 } 24241 cur_subprog++; 24242 stack_depth = subprogs[cur_subprog].stack_depth; 24243 stack_depth_extra = 0; 24244 } 24245 i++; 24246 insn++; 24247 } 24248 24249 env->prog->aux->stack_depth = subprogs[0].stack_depth; 24250 for (i = 0; i < env->subprog_cnt; i++) { 24251 int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1; 24252 int subprog_start = subprogs[i].start; 24253 int stack_slots = subprogs[i].stack_extra / 8; 24254 int slots = delta, cnt = 0; 24255 24256 if (!stack_slots) 24257 continue; 24258 /* We need two slots in case timed may_goto is supported. */ 24259 if (stack_slots > slots) { 24260 verifier_bug(env, "stack_slots supports may_goto only"); 24261 return -EFAULT; 24262 } 24263 24264 stack_depth = subprogs[i].stack_depth; 24265 if (bpf_jit_supports_timed_may_goto()) { 24266 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth, 24267 BPF_MAX_TIMED_LOOPS); 24268 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0); 24269 } else { 24270 /* Add ST insn to subprog prologue to init extra stack */ 24271 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth, 24272 BPF_MAX_LOOPS); 24273 } 24274 /* Copy first actual insn to preserve it */ 24275 insn_buf[cnt++] = env->prog->insnsi[subprog_start]; 24276 24277 new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt); 24278 if (!new_prog) 24279 return -ENOMEM; 24280 env->prog = prog = new_prog; 24281 /* 24282 * If may_goto is a first insn of a prog there could be a jmp 24283 * insn that points to it, hence adjust all such jmps to point 24284 * to insn after BPF_ST that inits may_goto count. 24285 * Adjustment will succeed because bpf_patch_insn_data() didn't fail. 24286 */ 24287 WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta)); 24288 } 24289 24290 /* Since poke tab is now finalized, publish aux to tracker. */ 24291 for (i = 0; i < prog->aux->size_poke_tab; i++) { 24292 map_ptr = prog->aux->poke_tab[i].tail_call.map; 24293 if (!map_ptr->ops->map_poke_track || 24294 !map_ptr->ops->map_poke_untrack || 24295 !map_ptr->ops->map_poke_run) { 24296 verifier_bug(env, "poke tab is misconfigured"); 24297 return -EFAULT; 24298 } 24299 24300 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux); 24301 if (ret < 0) { 24302 verbose(env, "tracking tail call prog failed\n"); 24303 return ret; 24304 } 24305 } 24306 24307 ret = sort_kfunc_descs_by_imm_off(env); 24308 if (ret) 24309 return ret; 24310 24311 return 0; 24312 } 24313 24314 static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env, 24315 int position, 24316 s32 stack_base, 24317 u32 callback_subprogno, 24318 u32 *total_cnt) 24319 { 24320 s32 r6_offset = stack_base + 0 * BPF_REG_SIZE; 24321 s32 r7_offset = stack_base + 1 * BPF_REG_SIZE; 24322 s32 r8_offset = stack_base + 2 * BPF_REG_SIZE; 24323 int reg_loop_max = BPF_REG_6; 24324 int reg_loop_cnt = BPF_REG_7; 24325 int reg_loop_ctx = BPF_REG_8; 24326 24327 struct bpf_insn *insn_buf = env->insn_buf; 24328 struct bpf_prog *new_prog; 24329 u32 callback_start; 24330 u32 call_insn_offset; 24331 s32 callback_offset; 24332 u32 cnt = 0; 24333 24334 /* This represents an inlined version of bpf_iter.c:bpf_loop, 24335 * be careful to modify this code in sync. 24336 */ 24337 24338 /* Return error and jump to the end of the patch if 24339 * expected number of iterations is too big. 24340 */ 24341 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2); 24342 insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG); 24343 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16); 24344 /* spill R6, R7, R8 to use these as loop vars */ 24345 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset); 24346 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset); 24347 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset); 24348 /* initialize loop vars */ 24349 insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1); 24350 insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0); 24351 insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3); 24352 /* loop header, 24353 * if reg_loop_cnt >= reg_loop_max skip the loop body 24354 */ 24355 insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5); 24356 /* callback call, 24357 * correct callback offset would be set after patching 24358 */ 24359 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt); 24360 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx); 24361 insn_buf[cnt++] = BPF_CALL_REL(0); 24362 /* increment loop counter */ 24363 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1); 24364 /* jump to loop header if callback returned 0 */ 24365 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6); 24366 /* return value of bpf_loop, 24367 * set R0 to the number of iterations 24368 */ 24369 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt); 24370 /* restore original values of R6, R7, R8 */ 24371 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset); 24372 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset); 24373 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset); 24374 24375 *total_cnt = cnt; 24376 new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt); 24377 if (!new_prog) 24378 return new_prog; 24379 24380 /* callback start is known only after patching */ 24381 callback_start = env->subprog_info[callback_subprogno].start; 24382 /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */ 24383 call_insn_offset = position + 12; 24384 callback_offset = callback_start - call_insn_offset - 1; 24385 new_prog->insnsi[call_insn_offset].imm = callback_offset; 24386 24387 return new_prog; 24388 } 24389 24390 static bool is_bpf_loop_call(struct bpf_insn *insn) 24391 { 24392 return insn->code == (BPF_JMP | BPF_CALL) && 24393 insn->src_reg == 0 && 24394 insn->imm == BPF_FUNC_loop; 24395 } 24396 24397 /* For all sub-programs in the program (including main) check 24398 * insn_aux_data to see if there are bpf_loop calls that require 24399 * inlining. If such calls are found the calls are replaced with a 24400 * sequence of instructions produced by `inline_bpf_loop` function and 24401 * subprog stack_depth is increased by the size of 3 registers. 24402 * This stack space is used to spill values of the R6, R7, R8. These 24403 * registers are used to store the loop bound, counter and context 24404 * variables. 24405 */ 24406 static int optimize_bpf_loop(struct bpf_verifier_env *env) 24407 { 24408 struct bpf_subprog_info *subprogs = env->subprog_info; 24409 int i, cur_subprog = 0, cnt, delta = 0; 24410 struct bpf_insn *insn = env->prog->insnsi; 24411 int insn_cnt = env->prog->len; 24412 u16 stack_depth = subprogs[cur_subprog].stack_depth; 24413 u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth; 24414 u16 stack_depth_extra = 0; 24415 24416 for (i = 0; i < insn_cnt; i++, insn++) { 24417 struct bpf_loop_inline_state *inline_state = 24418 &env->insn_aux_data[i + delta].loop_inline_state; 24419 24420 if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) { 24421 struct bpf_prog *new_prog; 24422 24423 stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup; 24424 new_prog = inline_bpf_loop(env, 24425 i + delta, 24426 -(stack_depth + stack_depth_extra), 24427 inline_state->callback_subprogno, 24428 &cnt); 24429 if (!new_prog) 24430 return -ENOMEM; 24431 24432 delta += cnt - 1; 24433 env->prog = new_prog; 24434 insn = new_prog->insnsi + i + delta; 24435 } 24436 24437 if (subprogs[cur_subprog + 1].start == i + delta + 1) { 24438 subprogs[cur_subprog].stack_depth += stack_depth_extra; 24439 cur_subprog++; 24440 stack_depth = subprogs[cur_subprog].stack_depth; 24441 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth; 24442 stack_depth_extra = 0; 24443 } 24444 } 24445 24446 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; 24447 24448 return 0; 24449 } 24450 24451 /* Remove unnecessary spill/fill pairs, members of fastcall pattern, 24452 * adjust subprograms stack depth when possible. 24453 */ 24454 static int remove_fastcall_spills_fills(struct bpf_verifier_env *env) 24455 { 24456 struct bpf_subprog_info *subprog = env->subprog_info; 24457 struct bpf_insn_aux_data *aux = env->insn_aux_data; 24458 struct bpf_insn *insn = env->prog->insnsi; 24459 int insn_cnt = env->prog->len; 24460 u32 spills_num; 24461 bool modified = false; 24462 int i, j; 24463 24464 for (i = 0; i < insn_cnt; i++, insn++) { 24465 if (aux[i].fastcall_spills_num > 0) { 24466 spills_num = aux[i].fastcall_spills_num; 24467 /* NOPs would be removed by opt_remove_nops() */ 24468 for (j = 1; j <= spills_num; ++j) { 24469 *(insn - j) = NOP; 24470 *(insn + j) = NOP; 24471 } 24472 modified = true; 24473 } 24474 if ((subprog + 1)->start == i + 1) { 24475 if (modified && !subprog->keep_fastcall_stack) 24476 subprog->stack_depth = -subprog->fastcall_stack_off; 24477 subprog++; 24478 modified = false; 24479 } 24480 } 24481 24482 return 0; 24483 } 24484 24485 static void free_states(struct bpf_verifier_env *env) 24486 { 24487 struct bpf_verifier_state_list *sl; 24488 struct list_head *head, *pos, *tmp; 24489 struct bpf_scc_info *info; 24490 int i, j; 24491 24492 free_verifier_state(env->cur_state, true); 24493 env->cur_state = NULL; 24494 while (!pop_stack(env, NULL, NULL, false)); 24495 24496 list_for_each_safe(pos, tmp, &env->free_list) { 24497 sl = container_of(pos, struct bpf_verifier_state_list, node); 24498 free_verifier_state(&sl->state, false); 24499 kfree(sl); 24500 } 24501 INIT_LIST_HEAD(&env->free_list); 24502 24503 for (i = 0; i < env->scc_cnt; ++i) { 24504 info = env->scc_info[i]; 24505 if (!info) 24506 continue; 24507 for (j = 0; j < info->num_visits; j++) 24508 free_backedges(&info->visits[j]); 24509 kvfree(info); 24510 env->scc_info[i] = NULL; 24511 } 24512 24513 if (!env->explored_states) 24514 return; 24515 24516 for (i = 0; i < state_htab_size(env); i++) { 24517 head = &env->explored_states[i]; 24518 24519 list_for_each_safe(pos, tmp, head) { 24520 sl = container_of(pos, struct bpf_verifier_state_list, node); 24521 free_verifier_state(&sl->state, false); 24522 kfree(sl); 24523 } 24524 INIT_LIST_HEAD(&env->explored_states[i]); 24525 } 24526 } 24527 24528 static int do_check_common(struct bpf_verifier_env *env, int subprog) 24529 { 24530 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); 24531 struct bpf_subprog_info *sub = subprog_info(env, subprog); 24532 struct bpf_prog_aux *aux = env->prog->aux; 24533 struct bpf_verifier_state *state; 24534 struct bpf_reg_state *regs; 24535 int ret, i; 24536 24537 env->prev_linfo = NULL; 24538 env->pass_cnt++; 24539 24540 state = kzalloc_obj(struct bpf_verifier_state, GFP_KERNEL_ACCOUNT); 24541 if (!state) 24542 return -ENOMEM; 24543 state->curframe = 0; 24544 state->speculative = false; 24545 state->branches = 1; 24546 state->in_sleepable = env->prog->sleepable; 24547 state->frame[0] = kzalloc_obj(struct bpf_func_state, GFP_KERNEL_ACCOUNT); 24548 if (!state->frame[0]) { 24549 kfree(state); 24550 return -ENOMEM; 24551 } 24552 env->cur_state = state; 24553 init_func_state(env, state->frame[0], 24554 BPF_MAIN_FUNC /* callsite */, 24555 0 /* frameno */, 24556 subprog); 24557 state->first_insn_idx = env->subprog_info[subprog].start; 24558 state->last_insn_idx = -1; 24559 24560 regs = state->frame[state->curframe]->regs; 24561 if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) { 24562 const char *sub_name = subprog_name(env, subprog); 24563 struct bpf_subprog_arg_info *arg; 24564 struct bpf_reg_state *reg; 24565 24566 if (env->log.level & BPF_LOG_LEVEL) 24567 verbose(env, "Validating %s() func#%d...\n", sub_name, subprog); 24568 ret = btf_prepare_func_args(env, subprog); 24569 if (ret) 24570 goto out; 24571 24572 if (subprog_is_exc_cb(env, subprog)) { 24573 state->frame[0]->in_exception_callback_fn = true; 24574 /* We have already ensured that the callback returns an integer, just 24575 * like all global subprogs. We need to determine it only has a single 24576 * scalar argument. 24577 */ 24578 if (sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_ANYTHING) { 24579 verbose(env, "exception cb only supports single integer argument\n"); 24580 ret = -EINVAL; 24581 goto out; 24582 } 24583 } 24584 for (i = BPF_REG_1; i <= sub->arg_cnt; i++) { 24585 arg = &sub->args[i - BPF_REG_1]; 24586 reg = ®s[i]; 24587 24588 if (arg->arg_type == ARG_PTR_TO_CTX) { 24589 reg->type = PTR_TO_CTX; 24590 mark_reg_known_zero(env, regs, i); 24591 } else if (arg->arg_type == ARG_ANYTHING) { 24592 reg->type = SCALAR_VALUE; 24593 mark_reg_unknown(env, regs, i); 24594 } else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) { 24595 /* assume unspecial LOCAL dynptr type */ 24596 __mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen); 24597 } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) { 24598 reg->type = PTR_TO_MEM; 24599 reg->type |= arg->arg_type & 24600 (PTR_MAYBE_NULL | PTR_UNTRUSTED | MEM_RDONLY); 24601 mark_reg_known_zero(env, regs, i); 24602 reg->mem_size = arg->mem_size; 24603 if (arg->arg_type & PTR_MAYBE_NULL) 24604 reg->id = ++env->id_gen; 24605 } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) { 24606 reg->type = PTR_TO_BTF_ID; 24607 if (arg->arg_type & PTR_MAYBE_NULL) 24608 reg->type |= PTR_MAYBE_NULL; 24609 if (arg->arg_type & PTR_UNTRUSTED) 24610 reg->type |= PTR_UNTRUSTED; 24611 if (arg->arg_type & PTR_TRUSTED) 24612 reg->type |= PTR_TRUSTED; 24613 mark_reg_known_zero(env, regs, i); 24614 reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */ 24615 reg->btf_id = arg->btf_id; 24616 reg->id = ++env->id_gen; 24617 } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) { 24618 /* caller can pass either PTR_TO_ARENA or SCALAR */ 24619 mark_reg_unknown(env, regs, i); 24620 } else { 24621 verifier_bug(env, "unhandled arg#%d type %d", 24622 i - BPF_REG_1, arg->arg_type); 24623 ret = -EFAULT; 24624 goto out; 24625 } 24626 } 24627 } else { 24628 /* if main BPF program has associated BTF info, validate that 24629 * it's matching expected signature, and otherwise mark BTF 24630 * info for main program as unreliable 24631 */ 24632 if (env->prog->aux->func_info_aux) { 24633 ret = btf_prepare_func_args(env, 0); 24634 if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX) 24635 env->prog->aux->func_info_aux[0].unreliable = true; 24636 } 24637 24638 /* 1st arg to a function */ 24639 regs[BPF_REG_1].type = PTR_TO_CTX; 24640 mark_reg_known_zero(env, regs, BPF_REG_1); 24641 } 24642 24643 /* Acquire references for struct_ops program arguments tagged with "__ref" */ 24644 if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) { 24645 for (i = 0; i < aux->ctx_arg_info_size; i++) 24646 aux->ctx_arg_info[i].ref_obj_id = aux->ctx_arg_info[i].refcounted ? 24647 acquire_reference(env, 0) : 0; 24648 } 24649 24650 ret = do_check(env); 24651 out: 24652 if (!ret && pop_log) 24653 bpf_vlog_reset(&env->log, 0); 24654 free_states(env); 24655 return ret; 24656 } 24657 24658 /* Lazily verify all global functions based on their BTF, if they are called 24659 * from main BPF program or any of subprograms transitively. 24660 * BPF global subprogs called from dead code are not validated. 24661 * All callable global functions must pass verification. 24662 * Otherwise the whole program is rejected. 24663 * Consider: 24664 * int bar(int); 24665 * int foo(int f) 24666 * { 24667 * return bar(f); 24668 * } 24669 * int bar(int b) 24670 * { 24671 * ... 24672 * } 24673 * foo() will be verified first for R1=any_scalar_value. During verification it 24674 * will be assumed that bar() already verified successfully and call to bar() 24675 * from foo() will be checked for type match only. Later bar() will be verified 24676 * independently to check that it's safe for R1=any_scalar_value. 24677 */ 24678 static int do_check_subprogs(struct bpf_verifier_env *env) 24679 { 24680 struct bpf_prog_aux *aux = env->prog->aux; 24681 struct bpf_func_info_aux *sub_aux; 24682 int i, ret, new_cnt; 24683 24684 if (!aux->func_info) 24685 return 0; 24686 24687 /* exception callback is presumed to be always called */ 24688 if (env->exception_callback_subprog) 24689 subprog_aux(env, env->exception_callback_subprog)->called = true; 24690 24691 again: 24692 new_cnt = 0; 24693 for (i = 1; i < env->subprog_cnt; i++) { 24694 if (!subprog_is_global(env, i)) 24695 continue; 24696 24697 sub_aux = subprog_aux(env, i); 24698 if (!sub_aux->called || sub_aux->verified) 24699 continue; 24700 24701 env->insn_idx = env->subprog_info[i].start; 24702 WARN_ON_ONCE(env->insn_idx == 0); 24703 ret = do_check_common(env, i); 24704 if (ret) { 24705 return ret; 24706 } else if (env->log.level & BPF_LOG_LEVEL) { 24707 verbose(env, "Func#%d ('%s') is safe for any args that match its prototype\n", 24708 i, subprog_name(env, i)); 24709 } 24710 24711 /* We verified new global subprog, it might have called some 24712 * more global subprogs that we haven't verified yet, so we 24713 * need to do another pass over subprogs to verify those. 24714 */ 24715 sub_aux->verified = true; 24716 new_cnt++; 24717 } 24718 24719 /* We can't loop forever as we verify at least one global subprog on 24720 * each pass. 24721 */ 24722 if (new_cnt) 24723 goto again; 24724 24725 return 0; 24726 } 24727 24728 static int do_check_main(struct bpf_verifier_env *env) 24729 { 24730 int ret; 24731 24732 env->insn_idx = 0; 24733 ret = do_check_common(env, 0); 24734 if (!ret) 24735 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; 24736 return ret; 24737 } 24738 24739 24740 static void print_verification_stats(struct bpf_verifier_env *env) 24741 { 24742 int i; 24743 24744 if (env->log.level & BPF_LOG_STATS) { 24745 verbose(env, "verification time %lld usec\n", 24746 div_u64(env->verification_time, 1000)); 24747 verbose(env, "stack depth "); 24748 for (i = 0; i < env->subprog_cnt; i++) { 24749 u32 depth = env->subprog_info[i].stack_depth; 24750 24751 verbose(env, "%d", depth); 24752 if (i + 1 < env->subprog_cnt) 24753 verbose(env, "+"); 24754 } 24755 verbose(env, "\n"); 24756 } 24757 verbose(env, "processed %d insns (limit %d) max_states_per_insn %d " 24758 "total_states %d peak_states %d mark_read %d\n", 24759 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS, 24760 env->max_states_per_insn, env->total_states, 24761 env->peak_states, env->longest_mark_read_walk); 24762 } 24763 24764 int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, 24765 const struct bpf_ctx_arg_aux *info, u32 cnt) 24766 { 24767 prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL_ACCOUNT); 24768 prog->aux->ctx_arg_info_size = cnt; 24769 24770 return prog->aux->ctx_arg_info ? 0 : -ENOMEM; 24771 } 24772 24773 static int check_struct_ops_btf_id(struct bpf_verifier_env *env) 24774 { 24775 const struct btf_type *t, *func_proto; 24776 const struct bpf_struct_ops_desc *st_ops_desc; 24777 const struct bpf_struct_ops *st_ops; 24778 const struct btf_member *member; 24779 struct bpf_prog *prog = env->prog; 24780 bool has_refcounted_arg = false; 24781 u32 btf_id, member_idx, member_off; 24782 struct btf *btf; 24783 const char *mname; 24784 int i, err; 24785 24786 if (!prog->gpl_compatible) { 24787 verbose(env, "struct ops programs must have a GPL compatible license\n"); 24788 return -EINVAL; 24789 } 24790 24791 if (!prog->aux->attach_btf_id) 24792 return -ENOTSUPP; 24793 24794 btf = prog->aux->attach_btf; 24795 if (btf_is_module(btf)) { 24796 /* Make sure st_ops is valid through the lifetime of env */ 24797 env->attach_btf_mod = btf_try_get_module(btf); 24798 if (!env->attach_btf_mod) { 24799 verbose(env, "struct_ops module %s is not found\n", 24800 btf_get_name(btf)); 24801 return -ENOTSUPP; 24802 } 24803 } 24804 24805 btf_id = prog->aux->attach_btf_id; 24806 st_ops_desc = bpf_struct_ops_find(btf, btf_id); 24807 if (!st_ops_desc) { 24808 verbose(env, "attach_btf_id %u is not a supported struct\n", 24809 btf_id); 24810 return -ENOTSUPP; 24811 } 24812 st_ops = st_ops_desc->st_ops; 24813 24814 t = st_ops_desc->type; 24815 member_idx = prog->expected_attach_type; 24816 if (member_idx >= btf_type_vlen(t)) { 24817 verbose(env, "attach to invalid member idx %u of struct %s\n", 24818 member_idx, st_ops->name); 24819 return -EINVAL; 24820 } 24821 24822 member = &btf_type_member(t)[member_idx]; 24823 mname = btf_name_by_offset(btf, member->name_off); 24824 func_proto = btf_type_resolve_func_ptr(btf, member->type, 24825 NULL); 24826 if (!func_proto) { 24827 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n", 24828 mname, member_idx, st_ops->name); 24829 return -EINVAL; 24830 } 24831 24832 member_off = __btf_member_bit_offset(t, member) / 8; 24833 err = bpf_struct_ops_supported(st_ops, member_off); 24834 if (err) { 24835 verbose(env, "attach to unsupported member %s of struct %s\n", 24836 mname, st_ops->name); 24837 return err; 24838 } 24839 24840 if (st_ops->check_member) { 24841 err = st_ops->check_member(t, member, prog); 24842 24843 if (err) { 24844 verbose(env, "attach to unsupported member %s of struct %s\n", 24845 mname, st_ops->name); 24846 return err; 24847 } 24848 } 24849 24850 if (prog->aux->priv_stack_requested && !bpf_jit_supports_private_stack()) { 24851 verbose(env, "Private stack not supported by jit\n"); 24852 return -EACCES; 24853 } 24854 24855 for (i = 0; i < st_ops_desc->arg_info[member_idx].cnt; i++) { 24856 if (st_ops_desc->arg_info[member_idx].info->refcounted) { 24857 has_refcounted_arg = true; 24858 break; 24859 } 24860 } 24861 24862 /* Tail call is not allowed for programs with refcounted arguments since we 24863 * cannot guarantee that valid refcounted kptrs will be passed to the callee. 24864 */ 24865 for (i = 0; i < env->subprog_cnt; i++) { 24866 if (has_refcounted_arg && env->subprog_info[i].has_tail_call) { 24867 verbose(env, "program with __ref argument cannot tail call\n"); 24868 return -EINVAL; 24869 } 24870 } 24871 24872 prog->aux->st_ops = st_ops; 24873 prog->aux->attach_st_ops_member_off = member_off; 24874 24875 prog->aux->attach_func_proto = func_proto; 24876 prog->aux->attach_func_name = mname; 24877 env->ops = st_ops->verifier_ops; 24878 24879 return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info, 24880 st_ops_desc->arg_info[member_idx].cnt); 24881 } 24882 #define SECURITY_PREFIX "security_" 24883 24884 static int check_attach_modify_return(unsigned long addr, const char *func_name) 24885 { 24886 if (within_error_injection_list(addr) || 24887 !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1)) 24888 return 0; 24889 24890 return -EINVAL; 24891 } 24892 24893 /* list of non-sleepable functions that are otherwise on 24894 * ALLOW_ERROR_INJECTION list 24895 */ 24896 BTF_SET_START(btf_non_sleepable_error_inject) 24897 /* Three functions below can be called from sleepable and non-sleepable context. 24898 * Assume non-sleepable from bpf safety point of view. 24899 */ 24900 BTF_ID(func, __filemap_add_folio) 24901 #ifdef CONFIG_FAIL_PAGE_ALLOC 24902 BTF_ID(func, should_fail_alloc_page) 24903 #endif 24904 #ifdef CONFIG_FAILSLAB 24905 BTF_ID(func, should_failslab) 24906 #endif 24907 BTF_SET_END(btf_non_sleepable_error_inject) 24908 24909 static int check_non_sleepable_error_inject(u32 btf_id) 24910 { 24911 return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id); 24912 } 24913 24914 int bpf_check_attach_target(struct bpf_verifier_log *log, 24915 const struct bpf_prog *prog, 24916 const struct bpf_prog *tgt_prog, 24917 u32 btf_id, 24918 struct bpf_attach_target_info *tgt_info) 24919 { 24920 bool prog_extension = prog->type == BPF_PROG_TYPE_EXT; 24921 bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING; 24922 char trace_symbol[KSYM_SYMBOL_LEN]; 24923 const char prefix[] = "btf_trace_"; 24924 struct bpf_raw_event_map *btp; 24925 int ret = 0, subprog = -1, i; 24926 const struct btf_type *t; 24927 bool conservative = true; 24928 const char *tname, *fname; 24929 struct btf *btf; 24930 long addr = 0; 24931 struct module *mod = NULL; 24932 24933 if (!btf_id) { 24934 bpf_log(log, "Tracing programs must provide btf_id\n"); 24935 return -EINVAL; 24936 } 24937 btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf; 24938 if (!btf) { 24939 bpf_log(log, 24940 "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n"); 24941 return -EINVAL; 24942 } 24943 t = btf_type_by_id(btf, btf_id); 24944 if (!t) { 24945 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id); 24946 return -EINVAL; 24947 } 24948 tname = btf_name_by_offset(btf, t->name_off); 24949 if (!tname) { 24950 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id); 24951 return -EINVAL; 24952 } 24953 if (tgt_prog) { 24954 struct bpf_prog_aux *aux = tgt_prog->aux; 24955 bool tgt_changes_pkt_data; 24956 bool tgt_might_sleep; 24957 24958 if (bpf_prog_is_dev_bound(prog->aux) && 24959 !bpf_prog_dev_bound_match(prog, tgt_prog)) { 24960 bpf_log(log, "Target program bound device mismatch"); 24961 return -EINVAL; 24962 } 24963 24964 for (i = 0; i < aux->func_info_cnt; i++) 24965 if (aux->func_info[i].type_id == btf_id) { 24966 subprog = i; 24967 break; 24968 } 24969 if (subprog == -1) { 24970 bpf_log(log, "Subprog %s doesn't exist\n", tname); 24971 return -EINVAL; 24972 } 24973 if (aux->func && aux->func[subprog]->aux->exception_cb) { 24974 bpf_log(log, 24975 "%s programs cannot attach to exception callback\n", 24976 prog_extension ? "Extension" : "FENTRY/FEXIT"); 24977 return -EINVAL; 24978 } 24979 conservative = aux->func_info_aux[subprog].unreliable; 24980 if (prog_extension) { 24981 if (conservative) { 24982 bpf_log(log, 24983 "Cannot replace static functions\n"); 24984 return -EINVAL; 24985 } 24986 if (!prog->jit_requested) { 24987 bpf_log(log, 24988 "Extension programs should be JITed\n"); 24989 return -EINVAL; 24990 } 24991 tgt_changes_pkt_data = aux->func 24992 ? aux->func[subprog]->aux->changes_pkt_data 24993 : aux->changes_pkt_data; 24994 if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) { 24995 bpf_log(log, 24996 "Extension program changes packet data, while original does not\n"); 24997 return -EINVAL; 24998 } 24999 25000 tgt_might_sleep = aux->func 25001 ? aux->func[subprog]->aux->might_sleep 25002 : aux->might_sleep; 25003 if (prog->aux->might_sleep && !tgt_might_sleep) { 25004 bpf_log(log, 25005 "Extension program may sleep, while original does not\n"); 25006 return -EINVAL; 25007 } 25008 } 25009 if (!tgt_prog->jited) { 25010 bpf_log(log, "Can attach to only JITed progs\n"); 25011 return -EINVAL; 25012 } 25013 if (prog_tracing) { 25014 if (aux->attach_tracing_prog) { 25015 /* 25016 * Target program is an fentry/fexit which is already attached 25017 * to another tracing program. More levels of nesting 25018 * attachment are not allowed. 25019 */ 25020 bpf_log(log, "Cannot nest tracing program attach more than once\n"); 25021 return -EINVAL; 25022 } 25023 } else if (tgt_prog->type == prog->type) { 25024 /* 25025 * To avoid potential call chain cycles, prevent attaching of a 25026 * program extension to another extension. It's ok to attach 25027 * fentry/fexit to extension program. 25028 */ 25029 bpf_log(log, "Cannot recursively attach\n"); 25030 return -EINVAL; 25031 } 25032 if (tgt_prog->type == BPF_PROG_TYPE_TRACING && 25033 prog_extension && 25034 (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY || 25035 tgt_prog->expected_attach_type == BPF_TRACE_FEXIT || 25036 tgt_prog->expected_attach_type == BPF_TRACE_FSESSION)) { 25037 /* Program extensions can extend all program types 25038 * except fentry/fexit. The reason is the following. 25039 * The fentry/fexit programs are used for performance 25040 * analysis, stats and can be attached to any program 25041 * type. When extension program is replacing XDP function 25042 * it is necessary to allow performance analysis of all 25043 * functions. Both original XDP program and its program 25044 * extension. Hence attaching fentry/fexit to 25045 * BPF_PROG_TYPE_EXT is allowed. If extending of 25046 * fentry/fexit was allowed it would be possible to create 25047 * long call chain fentry->extension->fentry->extension 25048 * beyond reasonable stack size. Hence extending fentry 25049 * is not allowed. 25050 */ 25051 bpf_log(log, "Cannot extend fentry/fexit/fsession\n"); 25052 return -EINVAL; 25053 } 25054 } else { 25055 if (prog_extension) { 25056 bpf_log(log, "Cannot replace kernel functions\n"); 25057 return -EINVAL; 25058 } 25059 } 25060 25061 switch (prog->expected_attach_type) { 25062 case BPF_TRACE_RAW_TP: 25063 if (tgt_prog) { 25064 bpf_log(log, 25065 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n"); 25066 return -EINVAL; 25067 } 25068 if (!btf_type_is_typedef(t)) { 25069 bpf_log(log, "attach_btf_id %u is not a typedef\n", 25070 btf_id); 25071 return -EINVAL; 25072 } 25073 if (strncmp(prefix, tname, sizeof(prefix) - 1)) { 25074 bpf_log(log, "attach_btf_id %u points to wrong type name %s\n", 25075 btf_id, tname); 25076 return -EINVAL; 25077 } 25078 tname += sizeof(prefix) - 1; 25079 25080 /* The func_proto of "btf_trace_##tname" is generated from typedef without argument 25081 * names. Thus using bpf_raw_event_map to get argument names. 25082 */ 25083 btp = bpf_get_raw_tracepoint(tname); 25084 if (!btp) 25085 return -EINVAL; 25086 fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL, 25087 trace_symbol); 25088 bpf_put_raw_tracepoint(btp); 25089 25090 if (fname) 25091 ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC); 25092 25093 if (!fname || ret < 0) { 25094 bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n", 25095 prefix, tname); 25096 t = btf_type_by_id(btf, t->type); 25097 if (!btf_type_is_ptr(t)) 25098 /* should never happen in valid vmlinux build */ 25099 return -EINVAL; 25100 } else { 25101 t = btf_type_by_id(btf, ret); 25102 if (!btf_type_is_func(t)) 25103 /* should never happen in valid vmlinux build */ 25104 return -EINVAL; 25105 } 25106 25107 t = btf_type_by_id(btf, t->type); 25108 if (!btf_type_is_func_proto(t)) 25109 /* should never happen in valid vmlinux build */ 25110 return -EINVAL; 25111 25112 break; 25113 case BPF_TRACE_ITER: 25114 if (!btf_type_is_func(t)) { 25115 bpf_log(log, "attach_btf_id %u is not a function\n", 25116 btf_id); 25117 return -EINVAL; 25118 } 25119 t = btf_type_by_id(btf, t->type); 25120 if (!btf_type_is_func_proto(t)) 25121 return -EINVAL; 25122 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel); 25123 if (ret) 25124 return ret; 25125 break; 25126 default: 25127 if (!prog_extension) 25128 return -EINVAL; 25129 fallthrough; 25130 case BPF_MODIFY_RETURN: 25131 case BPF_LSM_MAC: 25132 case BPF_LSM_CGROUP: 25133 case BPF_TRACE_FENTRY: 25134 case BPF_TRACE_FEXIT: 25135 case BPF_TRACE_FSESSION: 25136 if (prog->expected_attach_type == BPF_TRACE_FSESSION && 25137 !bpf_jit_supports_fsession()) { 25138 bpf_log(log, "JIT does not support fsession\n"); 25139 return -EOPNOTSUPP; 25140 } 25141 if (!btf_type_is_func(t)) { 25142 bpf_log(log, "attach_btf_id %u is not a function\n", 25143 btf_id); 25144 return -EINVAL; 25145 } 25146 if (prog_extension && 25147 btf_check_type_match(log, prog, btf, t)) 25148 return -EINVAL; 25149 t = btf_type_by_id(btf, t->type); 25150 if (!btf_type_is_func_proto(t)) 25151 return -EINVAL; 25152 25153 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) && 25154 (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type || 25155 prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type)) 25156 return -EINVAL; 25157 25158 if (tgt_prog && conservative) 25159 t = NULL; 25160 25161 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel); 25162 if (ret < 0) 25163 return ret; 25164 25165 if (tgt_prog) { 25166 if (subprog == 0) 25167 addr = (long) tgt_prog->bpf_func; 25168 else 25169 addr = (long) tgt_prog->aux->func[subprog]->bpf_func; 25170 } else { 25171 if (btf_is_module(btf)) { 25172 mod = btf_try_get_module(btf); 25173 if (mod) 25174 addr = find_kallsyms_symbol_value(mod, tname); 25175 else 25176 addr = 0; 25177 } else { 25178 addr = kallsyms_lookup_name(tname); 25179 } 25180 if (!addr) { 25181 module_put(mod); 25182 bpf_log(log, 25183 "The address of function %s cannot be found\n", 25184 tname); 25185 return -ENOENT; 25186 } 25187 } 25188 25189 if (prog->sleepable) { 25190 ret = -EINVAL; 25191 switch (prog->type) { 25192 case BPF_PROG_TYPE_TRACING: 25193 25194 /* fentry/fexit/fmod_ret progs can be sleepable if they are 25195 * attached to ALLOW_ERROR_INJECTION and are not in denylist. 25196 */ 25197 if (!check_non_sleepable_error_inject(btf_id) && 25198 within_error_injection_list(addr)) 25199 ret = 0; 25200 /* fentry/fexit/fmod_ret progs can also be sleepable if they are 25201 * in the fmodret id set with the KF_SLEEPABLE flag. 25202 */ 25203 else { 25204 u32 *flags = btf_kfunc_is_modify_return(btf, btf_id, 25205 prog); 25206 25207 if (flags && (*flags & KF_SLEEPABLE)) 25208 ret = 0; 25209 } 25210 break; 25211 case BPF_PROG_TYPE_LSM: 25212 /* LSM progs check that they are attached to bpf_lsm_*() funcs. 25213 * Only some of them are sleepable. 25214 */ 25215 if (bpf_lsm_is_sleepable_hook(btf_id)) 25216 ret = 0; 25217 break; 25218 default: 25219 break; 25220 } 25221 if (ret) { 25222 module_put(mod); 25223 bpf_log(log, "%s is not sleepable\n", tname); 25224 return ret; 25225 } 25226 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) { 25227 if (tgt_prog) { 25228 module_put(mod); 25229 bpf_log(log, "can't modify return codes of BPF programs\n"); 25230 return -EINVAL; 25231 } 25232 ret = -EINVAL; 25233 if (btf_kfunc_is_modify_return(btf, btf_id, prog) || 25234 !check_attach_modify_return(addr, tname)) 25235 ret = 0; 25236 if (ret) { 25237 module_put(mod); 25238 bpf_log(log, "%s() is not modifiable\n", tname); 25239 return ret; 25240 } 25241 } 25242 25243 break; 25244 } 25245 tgt_info->tgt_addr = addr; 25246 tgt_info->tgt_name = tname; 25247 tgt_info->tgt_type = t; 25248 tgt_info->tgt_mod = mod; 25249 return 0; 25250 } 25251 25252 BTF_SET_START(btf_id_deny) 25253 BTF_ID_UNUSED 25254 #ifdef CONFIG_SMP 25255 BTF_ID(func, ___migrate_enable) 25256 BTF_ID(func, migrate_disable) 25257 BTF_ID(func, migrate_enable) 25258 #endif 25259 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU 25260 BTF_ID(func, rcu_read_unlock_strict) 25261 #endif 25262 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE) 25263 BTF_ID(func, preempt_count_add) 25264 BTF_ID(func, preempt_count_sub) 25265 #endif 25266 #ifdef CONFIG_PREEMPT_RCU 25267 BTF_ID(func, __rcu_read_lock) 25268 BTF_ID(func, __rcu_read_unlock) 25269 #endif 25270 BTF_SET_END(btf_id_deny) 25271 25272 /* fexit and fmod_ret can't be used to attach to __noreturn functions. 25273 * Currently, we must manually list all __noreturn functions here. Once a more 25274 * robust solution is implemented, this workaround can be removed. 25275 */ 25276 BTF_SET_START(noreturn_deny) 25277 #ifdef CONFIG_IA32_EMULATION 25278 BTF_ID(func, __ia32_sys_exit) 25279 BTF_ID(func, __ia32_sys_exit_group) 25280 #endif 25281 #ifdef CONFIG_KUNIT 25282 BTF_ID(func, __kunit_abort) 25283 BTF_ID(func, kunit_try_catch_throw) 25284 #endif 25285 #ifdef CONFIG_MODULES 25286 BTF_ID(func, __module_put_and_kthread_exit) 25287 #endif 25288 #ifdef CONFIG_X86_64 25289 BTF_ID(func, __x64_sys_exit) 25290 BTF_ID(func, __x64_sys_exit_group) 25291 #endif 25292 BTF_ID(func, do_exit) 25293 BTF_ID(func, do_group_exit) 25294 BTF_ID(func, kthread_complete_and_exit) 25295 BTF_ID(func, make_task_dead) 25296 BTF_SET_END(noreturn_deny) 25297 25298 static bool can_be_sleepable(struct bpf_prog *prog) 25299 { 25300 if (prog->type == BPF_PROG_TYPE_TRACING) { 25301 switch (prog->expected_attach_type) { 25302 case BPF_TRACE_FENTRY: 25303 case BPF_TRACE_FEXIT: 25304 case BPF_MODIFY_RETURN: 25305 case BPF_TRACE_ITER: 25306 case BPF_TRACE_FSESSION: 25307 return true; 25308 default: 25309 return false; 25310 } 25311 } 25312 return prog->type == BPF_PROG_TYPE_LSM || 25313 prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ || 25314 prog->type == BPF_PROG_TYPE_STRUCT_OPS; 25315 } 25316 25317 static int check_attach_btf_id(struct bpf_verifier_env *env) 25318 { 25319 struct bpf_prog *prog = env->prog; 25320 struct bpf_prog *tgt_prog = prog->aux->dst_prog; 25321 struct bpf_attach_target_info tgt_info = {}; 25322 u32 btf_id = prog->aux->attach_btf_id; 25323 struct bpf_trampoline *tr; 25324 int ret; 25325 u64 key; 25326 25327 if (prog->type == BPF_PROG_TYPE_SYSCALL) { 25328 if (prog->sleepable) 25329 /* attach_btf_id checked to be zero already */ 25330 return 0; 25331 verbose(env, "Syscall programs can only be sleepable\n"); 25332 return -EINVAL; 25333 } 25334 25335 if (prog->sleepable && !can_be_sleepable(prog)) { 25336 verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n"); 25337 return -EINVAL; 25338 } 25339 25340 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) 25341 return check_struct_ops_btf_id(env); 25342 25343 if (prog->type != BPF_PROG_TYPE_TRACING && 25344 prog->type != BPF_PROG_TYPE_LSM && 25345 prog->type != BPF_PROG_TYPE_EXT) 25346 return 0; 25347 25348 ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info); 25349 if (ret) 25350 return ret; 25351 25352 if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) { 25353 /* to make freplace equivalent to their targets, they need to 25354 * inherit env->ops and expected_attach_type for the rest of the 25355 * verification 25356 */ 25357 env->ops = bpf_verifier_ops[tgt_prog->type]; 25358 prog->expected_attach_type = tgt_prog->expected_attach_type; 25359 } 25360 25361 /* store info about the attachment target that will be used later */ 25362 prog->aux->attach_func_proto = tgt_info.tgt_type; 25363 prog->aux->attach_func_name = tgt_info.tgt_name; 25364 prog->aux->mod = tgt_info.tgt_mod; 25365 25366 if (tgt_prog) { 25367 prog->aux->saved_dst_prog_type = tgt_prog->type; 25368 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type; 25369 } 25370 25371 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) { 25372 prog->aux->attach_btf_trace = true; 25373 return 0; 25374 } else if (prog->expected_attach_type == BPF_TRACE_ITER) { 25375 return bpf_iter_prog_supported(prog); 25376 } 25377 25378 if (prog->type == BPF_PROG_TYPE_LSM) { 25379 ret = bpf_lsm_verify_prog(&env->log, prog); 25380 if (ret < 0) 25381 return ret; 25382 } else if (prog->type == BPF_PROG_TYPE_TRACING && 25383 btf_id_set_contains(&btf_id_deny, btf_id)) { 25384 verbose(env, "Attaching tracing programs to function '%s' is rejected.\n", 25385 tgt_info.tgt_name); 25386 return -EINVAL; 25387 } else if ((prog->expected_attach_type == BPF_TRACE_FEXIT || 25388 prog->expected_attach_type == BPF_TRACE_FSESSION || 25389 prog->expected_attach_type == BPF_MODIFY_RETURN) && 25390 btf_id_set_contains(&noreturn_deny, btf_id)) { 25391 verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n", 25392 tgt_info.tgt_name); 25393 return -EINVAL; 25394 } 25395 25396 key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id); 25397 tr = bpf_trampoline_get(key, &tgt_info); 25398 if (!tr) 25399 return -ENOMEM; 25400 25401 if (tgt_prog && tgt_prog->aux->tail_call_reachable) 25402 tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX; 25403 25404 prog->aux->dst_trampoline = tr; 25405 return 0; 25406 } 25407 25408 struct btf *bpf_get_btf_vmlinux(void) 25409 { 25410 if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { 25411 mutex_lock(&bpf_verifier_lock); 25412 if (!btf_vmlinux) 25413 btf_vmlinux = btf_parse_vmlinux(); 25414 mutex_unlock(&bpf_verifier_lock); 25415 } 25416 return btf_vmlinux; 25417 } 25418 25419 /* 25420 * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In 25421 * this case expect that every file descriptor in the array is either a map or 25422 * a BTF. Everything else is considered to be trash. 25423 */ 25424 static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd) 25425 { 25426 struct bpf_map *map; 25427 struct btf *btf; 25428 CLASS(fd, f)(fd); 25429 int err; 25430 25431 map = __bpf_map_get(f); 25432 if (!IS_ERR(map)) { 25433 err = __add_used_map(env, map); 25434 if (err < 0) 25435 return err; 25436 return 0; 25437 } 25438 25439 btf = __btf_get_by_fd(f); 25440 if (!IS_ERR(btf)) { 25441 btf_get(btf); 25442 return __add_used_btf(env, btf); 25443 } 25444 25445 verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd); 25446 return PTR_ERR(map); 25447 } 25448 25449 static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr) 25450 { 25451 size_t size = sizeof(int); 25452 int ret; 25453 int fd; 25454 u32 i; 25455 25456 env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); 25457 25458 /* 25459 * The only difference between old (no fd_array_cnt is given) and new 25460 * APIs is that in the latter case the fd_array is expected to be 25461 * continuous and is scanned for map fds right away 25462 */ 25463 if (!attr->fd_array_cnt) 25464 return 0; 25465 25466 /* Check for integer overflow */ 25467 if (attr->fd_array_cnt >= (U32_MAX / size)) { 25468 verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt); 25469 return -EINVAL; 25470 } 25471 25472 for (i = 0; i < attr->fd_array_cnt; i++) { 25473 if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size)) 25474 return -EFAULT; 25475 25476 ret = add_fd_from_fd_array(env, fd); 25477 if (ret) 25478 return ret; 25479 } 25480 25481 return 0; 25482 } 25483 25484 /* Each field is a register bitmask */ 25485 struct insn_live_regs { 25486 u16 use; /* registers read by instruction */ 25487 u16 def; /* registers written by instruction */ 25488 u16 in; /* registers that may be alive before instruction */ 25489 u16 out; /* registers that may be alive after instruction */ 25490 }; 25491 25492 /* Bitmask with 1s for all caller saved registers */ 25493 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1) 25494 25495 /* Compute info->{use,def} fields for the instruction */ 25496 static void compute_insn_live_regs(struct bpf_verifier_env *env, 25497 struct bpf_insn *insn, 25498 struct insn_live_regs *info) 25499 { 25500 struct call_summary cs; 25501 u8 class = BPF_CLASS(insn->code); 25502 u8 code = BPF_OP(insn->code); 25503 u8 mode = BPF_MODE(insn->code); 25504 u16 src = BIT(insn->src_reg); 25505 u16 dst = BIT(insn->dst_reg); 25506 u16 r0 = BIT(0); 25507 u16 def = 0; 25508 u16 use = 0xffff; 25509 25510 switch (class) { 25511 case BPF_LD: 25512 switch (mode) { 25513 case BPF_IMM: 25514 if (BPF_SIZE(insn->code) == BPF_DW) { 25515 def = dst; 25516 use = 0; 25517 } 25518 break; 25519 case BPF_LD | BPF_ABS: 25520 case BPF_LD | BPF_IND: 25521 /* stick with defaults */ 25522 break; 25523 } 25524 break; 25525 case BPF_LDX: 25526 switch (mode) { 25527 case BPF_MEM: 25528 case BPF_MEMSX: 25529 def = dst; 25530 use = src; 25531 break; 25532 } 25533 break; 25534 case BPF_ST: 25535 switch (mode) { 25536 case BPF_MEM: 25537 def = 0; 25538 use = dst; 25539 break; 25540 } 25541 break; 25542 case BPF_STX: 25543 switch (mode) { 25544 case BPF_MEM: 25545 def = 0; 25546 use = dst | src; 25547 break; 25548 case BPF_ATOMIC: 25549 switch (insn->imm) { 25550 case BPF_CMPXCHG: 25551 use = r0 | dst | src; 25552 def = r0; 25553 break; 25554 case BPF_LOAD_ACQ: 25555 def = dst; 25556 use = src; 25557 break; 25558 case BPF_STORE_REL: 25559 def = 0; 25560 use = dst | src; 25561 break; 25562 default: 25563 use = dst | src; 25564 if (insn->imm & BPF_FETCH) 25565 def = src; 25566 else 25567 def = 0; 25568 } 25569 break; 25570 } 25571 break; 25572 case BPF_ALU: 25573 case BPF_ALU64: 25574 switch (code) { 25575 case BPF_END: 25576 use = dst; 25577 def = dst; 25578 break; 25579 case BPF_MOV: 25580 def = dst; 25581 if (BPF_SRC(insn->code) == BPF_K) 25582 use = 0; 25583 else 25584 use = src; 25585 break; 25586 default: 25587 def = dst; 25588 if (BPF_SRC(insn->code) == BPF_K) 25589 use = dst; 25590 else 25591 use = dst | src; 25592 } 25593 break; 25594 case BPF_JMP: 25595 case BPF_JMP32: 25596 switch (code) { 25597 case BPF_JA: 25598 def = 0; 25599 if (BPF_SRC(insn->code) == BPF_X) 25600 use = dst; 25601 else 25602 use = 0; 25603 break; 25604 case BPF_JCOND: 25605 def = 0; 25606 use = 0; 25607 break; 25608 case BPF_EXIT: 25609 def = 0; 25610 use = r0; 25611 break; 25612 case BPF_CALL: 25613 def = ALL_CALLER_SAVED_REGS; 25614 use = def & ~BIT(BPF_REG_0); 25615 if (get_call_summary(env, insn, &cs)) 25616 use = GENMASK(cs.num_params, 1); 25617 break; 25618 default: 25619 def = 0; 25620 if (BPF_SRC(insn->code) == BPF_K) 25621 use = dst; 25622 else 25623 use = dst | src; 25624 } 25625 break; 25626 } 25627 25628 info->def = def; 25629 info->use = use; 25630 } 25631 25632 /* Compute may-live registers after each instruction in the program. 25633 * The register is live after the instruction I if it is read by some 25634 * instruction S following I during program execution and is not 25635 * overwritten between I and S. 25636 * 25637 * Store result in env->insn_aux_data[i].live_regs. 25638 */ 25639 static int compute_live_registers(struct bpf_verifier_env *env) 25640 { 25641 struct bpf_insn_aux_data *insn_aux = env->insn_aux_data; 25642 struct bpf_insn *insns = env->prog->insnsi; 25643 struct insn_live_regs *state; 25644 int insn_cnt = env->prog->len; 25645 int err = 0, i, j; 25646 bool changed; 25647 25648 /* Use the following algorithm: 25649 * - define the following: 25650 * - I.use : a set of all registers read by instruction I; 25651 * - I.def : a set of all registers written by instruction I; 25652 * - I.in : a set of all registers that may be alive before I execution; 25653 * - I.out : a set of all registers that may be alive after I execution; 25654 * - insn_successors(I): a set of instructions S that might immediately 25655 * follow I for some program execution; 25656 * - associate separate empty sets 'I.in' and 'I.out' with each instruction; 25657 * - visit each instruction in a postorder and update 25658 * state[i].in, state[i].out as follows: 25659 * 25660 * state[i].out = U [state[s].in for S in insn_successors(i)] 25661 * state[i].in = (state[i].out / state[i].def) U state[i].use 25662 * 25663 * (where U stands for set union, / stands for set difference) 25664 * - repeat the computation while {in,out} fields changes for 25665 * any instruction. 25666 */ 25667 state = kvzalloc_objs(*state, insn_cnt, GFP_KERNEL_ACCOUNT); 25668 if (!state) { 25669 err = -ENOMEM; 25670 goto out; 25671 } 25672 25673 for (i = 0; i < insn_cnt; ++i) 25674 compute_insn_live_regs(env, &insns[i], &state[i]); 25675 25676 changed = true; 25677 while (changed) { 25678 changed = false; 25679 for (i = 0; i < env->cfg.cur_postorder; ++i) { 25680 int insn_idx = env->cfg.insn_postorder[i]; 25681 struct insn_live_regs *live = &state[insn_idx]; 25682 struct bpf_iarray *succ; 25683 u16 new_out = 0; 25684 u16 new_in = 0; 25685 25686 succ = bpf_insn_successors(env, insn_idx); 25687 for (int s = 0; s < succ->cnt; ++s) 25688 new_out |= state[succ->items[s]].in; 25689 new_in = (new_out & ~live->def) | live->use; 25690 if (new_out != live->out || new_in != live->in) { 25691 live->in = new_in; 25692 live->out = new_out; 25693 changed = true; 25694 } 25695 } 25696 } 25697 25698 for (i = 0; i < insn_cnt; ++i) 25699 insn_aux[i].live_regs_before = state[i].in; 25700 25701 if (env->log.level & BPF_LOG_LEVEL2) { 25702 verbose(env, "Live regs before insn:\n"); 25703 for (i = 0; i < insn_cnt; ++i) { 25704 if (env->insn_aux_data[i].scc) 25705 verbose(env, "%3d ", env->insn_aux_data[i].scc); 25706 else 25707 verbose(env, " "); 25708 verbose(env, "%3d: ", i); 25709 for (j = BPF_REG_0; j < BPF_REG_10; ++j) 25710 if (insn_aux[i].live_regs_before & BIT(j)) 25711 verbose(env, "%d", j); 25712 else 25713 verbose(env, "."); 25714 verbose(env, " "); 25715 verbose_insn(env, &insns[i]); 25716 if (bpf_is_ldimm64(&insns[i])) 25717 i++; 25718 } 25719 } 25720 25721 out: 25722 kvfree(state); 25723 return err; 25724 } 25725 25726 /* 25727 * Compute strongly connected components (SCCs) on the CFG. 25728 * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc. 25729 * If instruction is a sole member of its SCC and there are no self edges, 25730 * assign it SCC number of zero. 25731 * Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation. 25732 */ 25733 static int compute_scc(struct bpf_verifier_env *env) 25734 { 25735 const u32 NOT_ON_STACK = U32_MAX; 25736 25737 struct bpf_insn_aux_data *aux = env->insn_aux_data; 25738 const u32 insn_cnt = env->prog->len; 25739 int stack_sz, dfs_sz, err = 0; 25740 u32 *stack, *pre, *low, *dfs; 25741 u32 i, j, t, w; 25742 u32 next_preorder_num; 25743 u32 next_scc_id; 25744 bool assign_scc; 25745 struct bpf_iarray *succ; 25746 25747 next_preorder_num = 1; 25748 next_scc_id = 1; 25749 /* 25750 * - 'stack' accumulates vertices in DFS order, see invariant comment below; 25751 * - 'pre[t] == p' => preorder number of vertex 't' is 'p'; 25752 * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n'; 25753 * - 'dfs' DFS traversal stack, used to emulate explicit recursion. 25754 */ 25755 stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); 25756 pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); 25757 low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); 25758 dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL_ACCOUNT); 25759 if (!stack || !pre || !low || !dfs) { 25760 err = -ENOMEM; 25761 goto exit; 25762 } 25763 /* 25764 * References: 25765 * [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms" 25766 * [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components" 25767 * 25768 * The algorithm maintains the following invariant: 25769 * - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]'; 25770 * - then, vertex 'u' remains on stack while vertex 'v' is on stack. 25771 * 25772 * Consequently: 25773 * - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u', 25774 * such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack, 25775 * and thus there is an SCC (loop) containing both 'u' and 'v'. 25776 * - If 'low[v] == pre[v]', loops containing 'v' have been explored, 25777 * and 'v' can be considered the root of some SCC. 25778 * 25779 * Here is a pseudo-code for an explicitly recursive version of the algorithm: 25780 * 25781 * NOT_ON_STACK = insn_cnt + 1 25782 * pre = [0] * insn_cnt 25783 * low = [0] * insn_cnt 25784 * scc = [0] * insn_cnt 25785 * stack = [] 25786 * 25787 * next_preorder_num = 1 25788 * next_scc_id = 1 25789 * 25790 * def recur(w): 25791 * nonlocal next_preorder_num 25792 * nonlocal next_scc_id 25793 * 25794 * pre[w] = next_preorder_num 25795 * low[w] = next_preorder_num 25796 * next_preorder_num += 1 25797 * stack.append(w) 25798 * for s in successors(w): 25799 * # Note: for classic algorithm the block below should look as: 25800 * # 25801 * # if pre[s] == 0: 25802 * # recur(s) 25803 * # low[w] = min(low[w], low[s]) 25804 * # elif low[s] != NOT_ON_STACK: 25805 * # low[w] = min(low[w], pre[s]) 25806 * # 25807 * # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])' 25808 * # does not break the invariant and makes itartive version of the algorithm 25809 * # simpler. See 'Algorithm #3' from [2]. 25810 * 25811 * # 's' not yet visited 25812 * if pre[s] == 0: 25813 * recur(s) 25814 * # if 's' is on stack, pick lowest reachable preorder number from it; 25815 * # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]', 25816 * # so 'min' would be a noop. 25817 * low[w] = min(low[w], low[s]) 25818 * 25819 * if low[w] == pre[w]: 25820 * # 'w' is the root of an SCC, pop all vertices 25821 * # below 'w' on stack and assign same SCC to them. 25822 * while True: 25823 * t = stack.pop() 25824 * low[t] = NOT_ON_STACK 25825 * scc[t] = next_scc_id 25826 * if t == w: 25827 * break 25828 * next_scc_id += 1 25829 * 25830 * for i in range(0, insn_cnt): 25831 * if pre[i] == 0: 25832 * recur(i) 25833 * 25834 * Below implementation replaces explicit recursion with array 'dfs'. 25835 */ 25836 for (i = 0; i < insn_cnt; i++) { 25837 if (pre[i]) 25838 continue; 25839 stack_sz = 0; 25840 dfs_sz = 1; 25841 dfs[0] = i; 25842 dfs_continue: 25843 while (dfs_sz) { 25844 w = dfs[dfs_sz - 1]; 25845 if (pre[w] == 0) { 25846 low[w] = next_preorder_num; 25847 pre[w] = next_preorder_num; 25848 next_preorder_num++; 25849 stack[stack_sz++] = w; 25850 } 25851 /* Visit 'w' successors */ 25852 succ = bpf_insn_successors(env, w); 25853 for (j = 0; j < succ->cnt; ++j) { 25854 if (pre[succ->items[j]]) { 25855 low[w] = min(low[w], low[succ->items[j]]); 25856 } else { 25857 dfs[dfs_sz++] = succ->items[j]; 25858 goto dfs_continue; 25859 } 25860 } 25861 /* 25862 * Preserve the invariant: if some vertex above in the stack 25863 * is reachable from 'w', keep 'w' on the stack. 25864 */ 25865 if (low[w] < pre[w]) { 25866 dfs_sz--; 25867 goto dfs_continue; 25868 } 25869 /* 25870 * Assign SCC number only if component has two or more elements, 25871 * or if component has a self reference, or if instruction is a 25872 * callback calling function (implicit loop). 25873 */ 25874 assign_scc = stack[stack_sz - 1] != w; /* two or more elements? */ 25875 for (j = 0; j < succ->cnt; ++j) { /* self reference? */ 25876 if (succ->items[j] == w) { 25877 assign_scc = true; 25878 break; 25879 } 25880 } 25881 if (bpf_calls_callback(env, w)) /* implicit loop? */ 25882 assign_scc = true; 25883 /* Pop component elements from stack */ 25884 do { 25885 t = stack[--stack_sz]; 25886 low[t] = NOT_ON_STACK; 25887 if (assign_scc) 25888 aux[t].scc = next_scc_id; 25889 } while (t != w); 25890 if (assign_scc) 25891 next_scc_id++; 25892 dfs_sz--; 25893 } 25894 } 25895 env->scc_info = kvzalloc_objs(*env->scc_info, next_scc_id, 25896 GFP_KERNEL_ACCOUNT); 25897 if (!env->scc_info) { 25898 err = -ENOMEM; 25899 goto exit; 25900 } 25901 env->scc_cnt = next_scc_id; 25902 exit: 25903 kvfree(stack); 25904 kvfree(pre); 25905 kvfree(low); 25906 kvfree(dfs); 25907 return err; 25908 } 25909 25910 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) 25911 { 25912 u64 start_time = ktime_get_ns(); 25913 struct bpf_verifier_env *env; 25914 int i, len, ret = -EINVAL, err; 25915 u32 log_true_size; 25916 bool is_priv; 25917 25918 BTF_TYPE_EMIT(enum bpf_features); 25919 25920 /* no program is valid */ 25921 if (ARRAY_SIZE(bpf_verifier_ops) == 0) 25922 return -EINVAL; 25923 25924 /* 'struct bpf_verifier_env' can be global, but since it's not small, 25925 * allocate/free it every time bpf_check() is called 25926 */ 25927 env = kvzalloc_obj(struct bpf_verifier_env, GFP_KERNEL_ACCOUNT); 25928 if (!env) 25929 return -ENOMEM; 25930 25931 env->bt.env = env; 25932 25933 len = (*prog)->len; 25934 env->insn_aux_data = 25935 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len)); 25936 ret = -ENOMEM; 25937 if (!env->insn_aux_data) 25938 goto err_free_env; 25939 for (i = 0; i < len; i++) 25940 env->insn_aux_data[i].orig_idx = i; 25941 env->succ = iarray_realloc(NULL, 2); 25942 if (!env->succ) 25943 goto err_free_env; 25944 env->prog = *prog; 25945 env->ops = bpf_verifier_ops[env->prog->type]; 25946 25947 env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token); 25948 env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token); 25949 env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token); 25950 env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token); 25951 env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF); 25952 25953 bpf_get_btf_vmlinux(); 25954 25955 /* grab the mutex to protect few globals used by verifier */ 25956 if (!is_priv) 25957 mutex_lock(&bpf_verifier_lock); 25958 25959 /* user could have requested verbose verifier output 25960 * and supplied buffer to store the verification trace 25961 */ 25962 ret = bpf_vlog_init(&env->log, attr->log_level, 25963 (char __user *) (unsigned long) attr->log_buf, 25964 attr->log_size); 25965 if (ret) 25966 goto err_unlock; 25967 25968 ret = process_fd_array(env, attr, uattr); 25969 if (ret) 25970 goto skip_full_check; 25971 25972 mark_verifier_state_clean(env); 25973 25974 if (IS_ERR(btf_vmlinux)) { 25975 /* Either gcc or pahole or kernel are broken. */ 25976 verbose(env, "in-kernel BTF is malformed\n"); 25977 ret = PTR_ERR(btf_vmlinux); 25978 goto skip_full_check; 25979 } 25980 25981 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); 25982 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) 25983 env->strict_alignment = true; 25984 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) 25985 env->strict_alignment = false; 25986 25987 if (is_priv) 25988 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; 25989 env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS; 25990 25991 env->explored_states = kvzalloc_objs(struct list_head, 25992 state_htab_size(env), 25993 GFP_KERNEL_ACCOUNT); 25994 ret = -ENOMEM; 25995 if (!env->explored_states) 25996 goto skip_full_check; 25997 25998 for (i = 0; i < state_htab_size(env); i++) 25999 INIT_LIST_HEAD(&env->explored_states[i]); 26000 INIT_LIST_HEAD(&env->free_list); 26001 26002 ret = check_btf_info_early(env, attr, uattr); 26003 if (ret < 0) 26004 goto skip_full_check; 26005 26006 ret = add_subprog_and_kfunc(env); 26007 if (ret < 0) 26008 goto skip_full_check; 26009 26010 ret = check_subprogs(env); 26011 if (ret < 0) 26012 goto skip_full_check; 26013 26014 ret = check_btf_info(env, attr, uattr); 26015 if (ret < 0) 26016 goto skip_full_check; 26017 26018 ret = resolve_pseudo_ldimm64(env); 26019 if (ret < 0) 26020 goto skip_full_check; 26021 26022 if (bpf_prog_is_offloaded(env->prog->aux)) { 26023 ret = bpf_prog_offload_verifier_prep(env->prog); 26024 if (ret) 26025 goto skip_full_check; 26026 } 26027 26028 ret = check_cfg(env); 26029 if (ret < 0) 26030 goto skip_full_check; 26031 26032 ret = compute_postorder(env); 26033 if (ret < 0) 26034 goto skip_full_check; 26035 26036 ret = bpf_stack_liveness_init(env); 26037 if (ret) 26038 goto skip_full_check; 26039 26040 ret = check_attach_btf_id(env); 26041 if (ret) 26042 goto skip_full_check; 26043 26044 ret = compute_scc(env); 26045 if (ret < 0) 26046 goto skip_full_check; 26047 26048 ret = compute_live_registers(env); 26049 if (ret < 0) 26050 goto skip_full_check; 26051 26052 ret = mark_fastcall_patterns(env); 26053 if (ret < 0) 26054 goto skip_full_check; 26055 26056 ret = do_check_main(env); 26057 ret = ret ?: do_check_subprogs(env); 26058 26059 if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux)) 26060 ret = bpf_prog_offload_finalize(env); 26061 26062 skip_full_check: 26063 kvfree(env->explored_states); 26064 26065 /* might decrease stack depth, keep it before passes that 26066 * allocate additional slots. 26067 */ 26068 if (ret == 0) 26069 ret = remove_fastcall_spills_fills(env); 26070 26071 if (ret == 0) 26072 ret = check_max_stack_depth(env); 26073 26074 /* instruction rewrites happen after this point */ 26075 if (ret == 0) 26076 ret = optimize_bpf_loop(env); 26077 26078 if (is_priv) { 26079 if (ret == 0) 26080 opt_hard_wire_dead_code_branches(env); 26081 if (ret == 0) 26082 ret = opt_remove_dead_code(env); 26083 if (ret == 0) 26084 ret = opt_remove_nops(env); 26085 } else { 26086 if (ret == 0) 26087 sanitize_dead_code(env); 26088 } 26089 26090 if (ret == 0) 26091 /* program is valid, convert *(u32*)(ctx + off) accesses */ 26092 ret = convert_ctx_accesses(env); 26093 26094 if (ret == 0) 26095 ret = do_misc_fixups(env); 26096 26097 /* do 32-bit optimization after insn patching has done so those patched 26098 * insns could be handled correctly. 26099 */ 26100 if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) { 26101 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr); 26102 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret 26103 : false; 26104 } 26105 26106 if (ret == 0) 26107 ret = fixup_call_args(env); 26108 26109 env->verification_time = ktime_get_ns() - start_time; 26110 print_verification_stats(env); 26111 env->prog->aux->verified_insns = env->insn_processed; 26112 26113 /* preserve original error even if log finalization is successful */ 26114 err = bpf_vlog_finalize(&env->log, &log_true_size); 26115 if (err) 26116 ret = err; 26117 26118 if (uattr_size >= offsetofend(union bpf_attr, log_true_size) && 26119 copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size), 26120 &log_true_size, sizeof(log_true_size))) { 26121 ret = -EFAULT; 26122 goto err_release_maps; 26123 } 26124 26125 if (ret) 26126 goto err_release_maps; 26127 26128 if (env->used_map_cnt) { 26129 /* if program passed verifier, update used_maps in bpf_prog_info */ 26130 env->prog->aux->used_maps = kmalloc_objs(env->used_maps[0], 26131 env->used_map_cnt, 26132 GFP_KERNEL_ACCOUNT); 26133 26134 if (!env->prog->aux->used_maps) { 26135 ret = -ENOMEM; 26136 goto err_release_maps; 26137 } 26138 26139 memcpy(env->prog->aux->used_maps, env->used_maps, 26140 sizeof(env->used_maps[0]) * env->used_map_cnt); 26141 env->prog->aux->used_map_cnt = env->used_map_cnt; 26142 } 26143 if (env->used_btf_cnt) { 26144 /* if program passed verifier, update used_btfs in bpf_prog_aux */ 26145 env->prog->aux->used_btfs = kmalloc_objs(env->used_btfs[0], 26146 env->used_btf_cnt, 26147 GFP_KERNEL_ACCOUNT); 26148 if (!env->prog->aux->used_btfs) { 26149 ret = -ENOMEM; 26150 goto err_release_maps; 26151 } 26152 26153 memcpy(env->prog->aux->used_btfs, env->used_btfs, 26154 sizeof(env->used_btfs[0]) * env->used_btf_cnt); 26155 env->prog->aux->used_btf_cnt = env->used_btf_cnt; 26156 } 26157 if (env->used_map_cnt || env->used_btf_cnt) { 26158 /* program is valid. Convert pseudo bpf_ld_imm64 into generic 26159 * bpf_ld_imm64 instructions 26160 */ 26161 convert_pseudo_ld_imm64(env); 26162 } 26163 26164 adjust_btf_func(env); 26165 26166 err_release_maps: 26167 if (ret) 26168 release_insn_arrays(env); 26169 if (!env->prog->aux->used_maps) 26170 /* if we didn't copy map pointers into bpf_prog_info, release 26171 * them now. Otherwise free_used_maps() will release them. 26172 */ 26173 release_maps(env); 26174 if (!env->prog->aux->used_btfs) 26175 release_btfs(env); 26176 26177 /* extension progs temporarily inherit the attach_type of their targets 26178 for verification purposes, so set it back to zero before returning 26179 */ 26180 if (env->prog->type == BPF_PROG_TYPE_EXT) 26181 env->prog->expected_attach_type = 0; 26182 26183 *prog = env->prog; 26184 26185 module_put(env->attach_btf_mod); 26186 err_unlock: 26187 if (!is_priv) 26188 mutex_unlock(&bpf_verifier_lock); 26189 clear_insn_aux_data(env, 0, env->prog->len); 26190 vfree(env->insn_aux_data); 26191 err_free_env: 26192 bpf_stack_liveness_free(env); 26193 kvfree(env->cfg.insn_postorder); 26194 kvfree(env->scc_info); 26195 kvfree(env->succ); 26196 kvfree(env->gotox_tmp_buf); 26197 kvfree(env); 26198 return ret; 26199 } 26200