1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io 5 */ 6 #include <uapi/linux/btf.h> 7 #include <linux/kernel.h> 8 #include <linux/types.h> 9 #include <linux/slab.h> 10 #include <linux/bpf.h> 11 #include <linux/btf.h> 12 #include <linux/bpf_verifier.h> 13 #include <linux/filter.h> 14 #include <net/netlink.h> 15 #include <linux/file.h> 16 #include <linux/vmalloc.h> 17 #include <linux/stringify.h> 18 #include <linux/bsearch.h> 19 #include <linux/sort.h> 20 #include <linux/perf_event.h> 21 #include <linux/ctype.h> 22 #include <linux/error-injection.h> 23 #include <linux/bpf_lsm.h> 24 #include <linux/btf_ids.h> 25 26 #include "disasm.h" 27 28 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { 29 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 30 [_id] = & _name ## _verifier_ops, 31 #define BPF_MAP_TYPE(_id, _ops) 32 #define BPF_LINK_TYPE(_id, _name) 33 #include <linux/bpf_types.h> 34 #undef BPF_PROG_TYPE 35 #undef BPF_MAP_TYPE 36 #undef BPF_LINK_TYPE 37 }; 38 39 /* bpf_check() is a static code analyzer that walks eBPF program 40 * instruction by instruction and updates register/stack state. 41 * All paths of conditional branches are analyzed until 'bpf_exit' insn. 42 * 43 * The first pass is depth-first-search to check that the program is a DAG. 44 * It rejects the following programs: 45 * - larger than BPF_MAXINSNS insns 46 * - if loop is present (detected via back-edge) 47 * - unreachable insns exist (shouldn't be a forest. program = one function) 48 * - out of bounds or malformed jumps 49 * The second pass is all possible path descent from the 1st insn. 50 * Since it's analyzing all pathes through the program, the length of the 51 * analysis is limited to 64k insn, which may be hit even if total number of 52 * insn is less then 4K, but there are too many branches that change stack/regs. 53 * Number of 'branches to be analyzed' is limited to 1k 54 * 55 * On entry to each instruction, each register has a type, and the instruction 56 * changes the types of the registers depending on instruction semantics. 57 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is 58 * copied to R1. 59 * 60 * All registers are 64-bit. 61 * R0 - return register 62 * R1-R5 argument passing registers 63 * R6-R9 callee saved registers 64 * R10 - frame pointer read-only 65 * 66 * At the start of BPF program the register R1 contains a pointer to bpf_context 67 * and has type PTR_TO_CTX. 68 * 69 * Verifier tracks arithmetic operations on pointers in case: 70 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 71 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20), 72 * 1st insn copies R10 (which has FRAME_PTR) type into R1 73 * and 2nd arithmetic instruction is pattern matched to recognize 74 * that it wants to construct a pointer to some element within stack. 75 * So after 2nd insn, the register R1 has type PTR_TO_STACK 76 * (and -20 constant is saved for further stack bounds checking). 77 * Meaning that this reg is a pointer to stack plus known immediate constant. 78 * 79 * Most of the time the registers have SCALAR_VALUE type, which 80 * means the register has some value, but it's not a valid pointer. 81 * (like pointer plus pointer becomes SCALAR_VALUE type) 82 * 83 * When verifier sees load or store instructions the type of base register 84 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are 85 * four pointer types recognized by check_mem_access() function. 86 * 87 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value' 88 * and the range of [ptr, ptr + map's value_size) is accessible. 89 * 90 * registers used to pass values to function calls are checked against 91 * function argument constraints. 92 * 93 * ARG_PTR_TO_MAP_KEY is one of such argument constraints. 94 * It means that the register type passed to this function must be 95 * PTR_TO_STACK and it will be used inside the function as 96 * 'pointer to map element key' 97 * 98 * For example the argument constraints for bpf_map_lookup_elem(): 99 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 100 * .arg1_type = ARG_CONST_MAP_PTR, 101 * .arg2_type = ARG_PTR_TO_MAP_KEY, 102 * 103 * ret_type says that this function returns 'pointer to map elem value or null' 104 * function expects 1st argument to be a const pointer to 'struct bpf_map' and 105 * 2nd argument should be a pointer to stack, which will be used inside 106 * the helper function as a pointer to map element key. 107 * 108 * On the kernel side the helper function looks like: 109 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 110 * { 111 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; 112 * void *key = (void *) (unsigned long) r2; 113 * void *value; 114 * 115 * here kernel can access 'key' and 'map' pointers safely, knowing that 116 * [key, key + map->key_size) bytes are valid and were initialized on 117 * the stack of eBPF program. 118 * } 119 * 120 * Corresponding eBPF program may look like: 121 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR 122 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK 123 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP 124 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 125 * here verifier looks at prototype of map_lookup_elem() and sees: 126 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok, 127 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes 128 * 129 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far, 130 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits 131 * and were initialized prior to this call. 132 * If it's ok, then verifier allows this BPF_CALL insn and looks at 133 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets 134 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function 135 * returns ether pointer to map value or NULL. 136 * 137 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off' 138 * insn, the register holding that pointer in the true branch changes state to 139 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false 140 * branch. See check_cond_jmp_op(). 141 * 142 * After the call R0 is set to return type of the function and registers R1-R5 143 * are set to NOT_INIT to indicate that they are no longer readable. 144 * 145 * The following reference types represent a potential reference to a kernel 146 * resource which, after first being allocated, must be checked and freed by 147 * the BPF program: 148 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET 149 * 150 * When the verifier sees a helper call return a reference type, it allocates a 151 * pointer id for the reference and stores it in the current function state. 152 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into 153 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type 154 * passes through a NULL-check conditional. For the branch wherein the state is 155 * changed to CONST_IMM, the verifier releases the reference. 156 * 157 * For each helper function that allocates a reference, such as 158 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as 159 * bpf_sk_release(). When a reference type passes into the release function, 160 * the verifier also releases the reference. If any unchecked or unreleased 161 * reference remains at the end of the program, the verifier rejects it. 162 */ 163 164 /* verifier_state + insn_idx are pushed to stack when branch is encountered */ 165 struct bpf_verifier_stack_elem { 166 /* verifer state is 'st' 167 * before processing instruction 'insn_idx' 168 * and after processing instruction 'prev_insn_idx' 169 */ 170 struct bpf_verifier_state st; 171 int insn_idx; 172 int prev_insn_idx; 173 struct bpf_verifier_stack_elem *next; 174 /* length of verifier log at the time this state was pushed on stack */ 175 u32 log_pos; 176 }; 177 178 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192 179 #define BPF_COMPLEXITY_LIMIT_STATES 64 180 181 #define BPF_MAP_KEY_POISON (1ULL << 63) 182 #define BPF_MAP_KEY_SEEN (1ULL << 62) 183 184 #define BPF_MAP_PTR_UNPRIV 1UL 185 #define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \ 186 POISON_POINTER_DELTA)) 187 #define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV)) 188 189 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) 190 { 191 return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON; 192 } 193 194 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux) 195 { 196 return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV; 197 } 198 199 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux, 200 const struct bpf_map *map, bool unpriv) 201 { 202 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV); 203 unpriv |= bpf_map_ptr_unpriv(aux); 204 aux->map_ptr_state = (unsigned long)map | 205 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL); 206 } 207 208 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux) 209 { 210 return aux->map_key_state & BPF_MAP_KEY_POISON; 211 } 212 213 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux) 214 { 215 return !(aux->map_key_state & BPF_MAP_KEY_SEEN); 216 } 217 218 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux) 219 { 220 return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON); 221 } 222 223 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state) 224 { 225 bool poisoned = bpf_map_key_poisoned(aux); 226 227 aux->map_key_state = state | BPF_MAP_KEY_SEEN | 228 (poisoned ? BPF_MAP_KEY_POISON : 0ULL); 229 } 230 231 struct bpf_call_arg_meta { 232 struct bpf_map *map_ptr; 233 bool raw_mode; 234 bool pkt_access; 235 int regno; 236 int access_size; 237 int mem_size; 238 u64 msize_max_value; 239 int ref_obj_id; 240 int func_id; 241 u32 btf_id; 242 }; 243 244 struct btf *btf_vmlinux; 245 246 static DEFINE_MUTEX(bpf_verifier_lock); 247 248 static const struct bpf_line_info * 249 find_linfo(const struct bpf_verifier_env *env, u32 insn_off) 250 { 251 const struct bpf_line_info *linfo; 252 const struct bpf_prog *prog; 253 u32 i, nr_linfo; 254 255 prog = env->prog; 256 nr_linfo = prog->aux->nr_linfo; 257 258 if (!nr_linfo || insn_off >= prog->len) 259 return NULL; 260 261 linfo = prog->aux->linfo; 262 for (i = 1; i < nr_linfo; i++) 263 if (insn_off < linfo[i].insn_off) 264 break; 265 266 return &linfo[i - 1]; 267 } 268 269 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, 270 va_list args) 271 { 272 unsigned int n; 273 274 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args); 275 276 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, 277 "verifier log line truncated - local buffer too short\n"); 278 279 n = min(log->len_total - log->len_used - 1, n); 280 log->kbuf[n] = '\0'; 281 282 if (log->level == BPF_LOG_KERNEL) { 283 pr_err("BPF:%s\n", log->kbuf); 284 return; 285 } 286 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) 287 log->len_used += n; 288 else 289 log->ubuf = NULL; 290 } 291 292 static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos) 293 { 294 char zero = 0; 295 296 if (!bpf_verifier_log_needed(log)) 297 return; 298 299 log->len_used = new_pos; 300 if (put_user(zero, log->ubuf + new_pos)) 301 log->ubuf = NULL; 302 } 303 304 /* log_level controls verbosity level of eBPF verifier. 305 * bpf_verifier_log_write() is used to dump the verification trace to the log, 306 * so the user can figure out what's wrong with the program 307 */ 308 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, 309 const char *fmt, ...) 310 { 311 va_list args; 312 313 if (!bpf_verifier_log_needed(&env->log)) 314 return; 315 316 va_start(args, fmt); 317 bpf_verifier_vlog(&env->log, fmt, args); 318 va_end(args); 319 } 320 EXPORT_SYMBOL_GPL(bpf_verifier_log_write); 321 322 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...) 323 { 324 struct bpf_verifier_env *env = private_data; 325 va_list args; 326 327 if (!bpf_verifier_log_needed(&env->log)) 328 return; 329 330 va_start(args, fmt); 331 bpf_verifier_vlog(&env->log, fmt, args); 332 va_end(args); 333 } 334 335 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log, 336 const char *fmt, ...) 337 { 338 va_list args; 339 340 if (!bpf_verifier_log_needed(log)) 341 return; 342 343 va_start(args, fmt); 344 bpf_verifier_vlog(log, fmt, args); 345 va_end(args); 346 } 347 348 static const char *ltrim(const char *s) 349 { 350 while (isspace(*s)) 351 s++; 352 353 return s; 354 } 355 356 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env, 357 u32 insn_off, 358 const char *prefix_fmt, ...) 359 { 360 const struct bpf_line_info *linfo; 361 362 if (!bpf_verifier_log_needed(&env->log)) 363 return; 364 365 linfo = find_linfo(env, insn_off); 366 if (!linfo || linfo == env->prev_linfo) 367 return; 368 369 if (prefix_fmt) { 370 va_list args; 371 372 va_start(args, prefix_fmt); 373 bpf_verifier_vlog(&env->log, prefix_fmt, args); 374 va_end(args); 375 } 376 377 verbose(env, "%s\n", 378 ltrim(btf_name_by_offset(env->prog->aux->btf, 379 linfo->line_off))); 380 381 env->prev_linfo = linfo; 382 } 383 384 static bool type_is_pkt_pointer(enum bpf_reg_type type) 385 { 386 return type == PTR_TO_PACKET || 387 type == PTR_TO_PACKET_META; 388 } 389 390 static bool type_is_sk_pointer(enum bpf_reg_type type) 391 { 392 return type == PTR_TO_SOCKET || 393 type == PTR_TO_SOCK_COMMON || 394 type == PTR_TO_TCP_SOCK || 395 type == PTR_TO_XDP_SOCK; 396 } 397 398 static bool reg_type_not_null(enum bpf_reg_type type) 399 { 400 return type == PTR_TO_SOCKET || 401 type == PTR_TO_TCP_SOCK || 402 type == PTR_TO_MAP_VALUE || 403 type == PTR_TO_SOCK_COMMON; 404 } 405 406 static bool reg_type_may_be_null(enum bpf_reg_type type) 407 { 408 return type == PTR_TO_MAP_VALUE_OR_NULL || 409 type == PTR_TO_SOCKET_OR_NULL || 410 type == PTR_TO_SOCK_COMMON_OR_NULL || 411 type == PTR_TO_TCP_SOCK_OR_NULL || 412 type == PTR_TO_BTF_ID_OR_NULL || 413 type == PTR_TO_MEM_OR_NULL || 414 type == PTR_TO_RDONLY_BUF_OR_NULL || 415 type == PTR_TO_RDWR_BUF_OR_NULL; 416 } 417 418 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) 419 { 420 return reg->type == PTR_TO_MAP_VALUE && 421 map_value_has_spin_lock(reg->map_ptr); 422 } 423 424 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) 425 { 426 return type == PTR_TO_SOCKET || 427 type == PTR_TO_SOCKET_OR_NULL || 428 type == PTR_TO_TCP_SOCK || 429 type == PTR_TO_TCP_SOCK_OR_NULL || 430 type == PTR_TO_MEM || 431 type == PTR_TO_MEM_OR_NULL; 432 } 433 434 static bool arg_type_may_be_refcounted(enum bpf_arg_type type) 435 { 436 return type == ARG_PTR_TO_SOCK_COMMON; 437 } 438 439 /* Determine whether the function releases some resources allocated by another 440 * function call. The first reference type argument will be assumed to be 441 * released by release_reference(). 442 */ 443 static bool is_release_function(enum bpf_func_id func_id) 444 { 445 return func_id == BPF_FUNC_sk_release || 446 func_id == BPF_FUNC_ringbuf_submit || 447 func_id == BPF_FUNC_ringbuf_discard; 448 } 449 450 static bool may_be_acquire_function(enum bpf_func_id func_id) 451 { 452 return func_id == BPF_FUNC_sk_lookup_tcp || 453 func_id == BPF_FUNC_sk_lookup_udp || 454 func_id == BPF_FUNC_skc_lookup_tcp || 455 func_id == BPF_FUNC_map_lookup_elem || 456 func_id == BPF_FUNC_ringbuf_reserve; 457 } 458 459 static bool is_acquire_function(enum bpf_func_id func_id, 460 const struct bpf_map *map) 461 { 462 enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC; 463 464 if (func_id == BPF_FUNC_sk_lookup_tcp || 465 func_id == BPF_FUNC_sk_lookup_udp || 466 func_id == BPF_FUNC_skc_lookup_tcp || 467 func_id == BPF_FUNC_ringbuf_reserve) 468 return true; 469 470 if (func_id == BPF_FUNC_map_lookup_elem && 471 (map_type == BPF_MAP_TYPE_SOCKMAP || 472 map_type == BPF_MAP_TYPE_SOCKHASH)) 473 return true; 474 475 return false; 476 } 477 478 static bool is_ptr_cast_function(enum bpf_func_id func_id) 479 { 480 return func_id == BPF_FUNC_tcp_sock || 481 func_id == BPF_FUNC_sk_fullsock; 482 } 483 484 /* string representation of 'enum bpf_reg_type' */ 485 static const char * const reg_type_str[] = { 486 [NOT_INIT] = "?", 487 [SCALAR_VALUE] = "inv", 488 [PTR_TO_CTX] = "ctx", 489 [CONST_PTR_TO_MAP] = "map_ptr", 490 [PTR_TO_MAP_VALUE] = "map_value", 491 [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", 492 [PTR_TO_STACK] = "fp", 493 [PTR_TO_PACKET] = "pkt", 494 [PTR_TO_PACKET_META] = "pkt_meta", 495 [PTR_TO_PACKET_END] = "pkt_end", 496 [PTR_TO_FLOW_KEYS] = "flow_keys", 497 [PTR_TO_SOCKET] = "sock", 498 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", 499 [PTR_TO_SOCK_COMMON] = "sock_common", 500 [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", 501 [PTR_TO_TCP_SOCK] = "tcp_sock", 502 [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", 503 [PTR_TO_TP_BUFFER] = "tp_buffer", 504 [PTR_TO_XDP_SOCK] = "xdp_sock", 505 [PTR_TO_BTF_ID] = "ptr_", 506 [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_", 507 [PTR_TO_MEM] = "mem", 508 [PTR_TO_MEM_OR_NULL] = "mem_or_null", 509 [PTR_TO_RDONLY_BUF] = "rdonly_buf", 510 [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null", 511 [PTR_TO_RDWR_BUF] = "rdwr_buf", 512 [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", 513 }; 514 515 static char slot_type_char[] = { 516 [STACK_INVALID] = '?', 517 [STACK_SPILL] = 'r', 518 [STACK_MISC] = 'm', 519 [STACK_ZERO] = '0', 520 }; 521 522 static void print_liveness(struct bpf_verifier_env *env, 523 enum bpf_reg_liveness live) 524 { 525 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE)) 526 verbose(env, "_"); 527 if (live & REG_LIVE_READ) 528 verbose(env, "r"); 529 if (live & REG_LIVE_WRITTEN) 530 verbose(env, "w"); 531 if (live & REG_LIVE_DONE) 532 verbose(env, "D"); 533 } 534 535 static struct bpf_func_state *func(struct bpf_verifier_env *env, 536 const struct bpf_reg_state *reg) 537 { 538 struct bpf_verifier_state *cur = env->cur_state; 539 540 return cur->frame[reg->frameno]; 541 } 542 543 const char *kernel_type_name(u32 id) 544 { 545 return btf_name_by_offset(btf_vmlinux, 546 btf_type_by_id(btf_vmlinux, id)->name_off); 547 } 548 549 static void print_verifier_state(struct bpf_verifier_env *env, 550 const struct bpf_func_state *state) 551 { 552 const struct bpf_reg_state *reg; 553 enum bpf_reg_type t; 554 int i; 555 556 if (state->frameno) 557 verbose(env, " frame%d:", state->frameno); 558 for (i = 0; i < MAX_BPF_REG; i++) { 559 reg = &state->regs[i]; 560 t = reg->type; 561 if (t == NOT_INIT) 562 continue; 563 verbose(env, " R%d", i); 564 print_liveness(env, reg->live); 565 verbose(env, "=%s", reg_type_str[t]); 566 if (t == SCALAR_VALUE && reg->precise) 567 verbose(env, "P"); 568 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && 569 tnum_is_const(reg->var_off)) { 570 /* reg->off should be 0 for SCALAR_VALUE */ 571 verbose(env, "%lld", reg->var_off.value + reg->off); 572 } else { 573 if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL) 574 verbose(env, "%s", kernel_type_name(reg->btf_id)); 575 verbose(env, "(id=%d", reg->id); 576 if (reg_type_may_be_refcounted_or_null(t)) 577 verbose(env, ",ref_obj_id=%d", reg->ref_obj_id); 578 if (t != SCALAR_VALUE) 579 verbose(env, ",off=%d", reg->off); 580 if (type_is_pkt_pointer(t)) 581 verbose(env, ",r=%d", reg->range); 582 else if (t == CONST_PTR_TO_MAP || 583 t == PTR_TO_MAP_VALUE || 584 t == PTR_TO_MAP_VALUE_OR_NULL) 585 verbose(env, ",ks=%d,vs=%d", 586 reg->map_ptr->key_size, 587 reg->map_ptr->value_size); 588 if (tnum_is_const(reg->var_off)) { 589 /* Typically an immediate SCALAR_VALUE, but 590 * could be a pointer whose offset is too big 591 * for reg->off 592 */ 593 verbose(env, ",imm=%llx", reg->var_off.value); 594 } else { 595 if (reg->smin_value != reg->umin_value && 596 reg->smin_value != S64_MIN) 597 verbose(env, ",smin_value=%lld", 598 (long long)reg->smin_value); 599 if (reg->smax_value != reg->umax_value && 600 reg->smax_value != S64_MAX) 601 verbose(env, ",smax_value=%lld", 602 (long long)reg->smax_value); 603 if (reg->umin_value != 0) 604 verbose(env, ",umin_value=%llu", 605 (unsigned long long)reg->umin_value); 606 if (reg->umax_value != U64_MAX) 607 verbose(env, ",umax_value=%llu", 608 (unsigned long long)reg->umax_value); 609 if (!tnum_is_unknown(reg->var_off)) { 610 char tn_buf[48]; 611 612 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 613 verbose(env, ",var_off=%s", tn_buf); 614 } 615 if (reg->s32_min_value != reg->smin_value && 616 reg->s32_min_value != S32_MIN) 617 verbose(env, ",s32_min_value=%d", 618 (int)(reg->s32_min_value)); 619 if (reg->s32_max_value != reg->smax_value && 620 reg->s32_max_value != S32_MAX) 621 verbose(env, ",s32_max_value=%d", 622 (int)(reg->s32_max_value)); 623 if (reg->u32_min_value != reg->umin_value && 624 reg->u32_min_value != U32_MIN) 625 verbose(env, ",u32_min_value=%d", 626 (int)(reg->u32_min_value)); 627 if (reg->u32_max_value != reg->umax_value && 628 reg->u32_max_value != U32_MAX) 629 verbose(env, ",u32_max_value=%d", 630 (int)(reg->u32_max_value)); 631 } 632 verbose(env, ")"); 633 } 634 } 635 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 636 char types_buf[BPF_REG_SIZE + 1]; 637 bool valid = false; 638 int j; 639 640 for (j = 0; j < BPF_REG_SIZE; j++) { 641 if (state->stack[i].slot_type[j] != STACK_INVALID) 642 valid = true; 643 types_buf[j] = slot_type_char[ 644 state->stack[i].slot_type[j]]; 645 } 646 types_buf[BPF_REG_SIZE] = 0; 647 if (!valid) 648 continue; 649 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); 650 print_liveness(env, state->stack[i].spilled_ptr.live); 651 if (state->stack[i].slot_type[0] == STACK_SPILL) { 652 reg = &state->stack[i].spilled_ptr; 653 t = reg->type; 654 verbose(env, "=%s", reg_type_str[t]); 655 if (t == SCALAR_VALUE && reg->precise) 656 verbose(env, "P"); 657 if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) 658 verbose(env, "%lld", reg->var_off.value + reg->off); 659 } else { 660 verbose(env, "=%s", types_buf); 661 } 662 } 663 if (state->acquired_refs && state->refs[0].id) { 664 verbose(env, " refs=%d", state->refs[0].id); 665 for (i = 1; i < state->acquired_refs; i++) 666 if (state->refs[i].id) 667 verbose(env, ",%d", state->refs[i].id); 668 } 669 verbose(env, "\n"); 670 } 671 672 #define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \ 673 static int copy_##NAME##_state(struct bpf_func_state *dst, \ 674 const struct bpf_func_state *src) \ 675 { \ 676 if (!src->FIELD) \ 677 return 0; \ 678 if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \ 679 /* internal bug, make state invalid to reject the program */ \ 680 memset(dst, 0, sizeof(*dst)); \ 681 return -EFAULT; \ 682 } \ 683 memcpy(dst->FIELD, src->FIELD, \ 684 sizeof(*src->FIELD) * (src->COUNT / SIZE)); \ 685 return 0; \ 686 } 687 /* copy_reference_state() */ 688 COPY_STATE_FN(reference, acquired_refs, refs, 1) 689 /* copy_stack_state() */ 690 COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) 691 #undef COPY_STATE_FN 692 693 #define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \ 694 static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \ 695 bool copy_old) \ 696 { \ 697 u32 old_size = state->COUNT; \ 698 struct bpf_##NAME##_state *new_##FIELD; \ 699 int slot = size / SIZE; \ 700 \ 701 if (size <= old_size || !size) { \ 702 if (copy_old) \ 703 return 0; \ 704 state->COUNT = slot * SIZE; \ 705 if (!size && old_size) { \ 706 kfree(state->FIELD); \ 707 state->FIELD = NULL; \ 708 } \ 709 return 0; \ 710 } \ 711 new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \ 712 GFP_KERNEL); \ 713 if (!new_##FIELD) \ 714 return -ENOMEM; \ 715 if (copy_old) { \ 716 if (state->FIELD) \ 717 memcpy(new_##FIELD, state->FIELD, \ 718 sizeof(*new_##FIELD) * (old_size / SIZE)); \ 719 memset(new_##FIELD + old_size / SIZE, 0, \ 720 sizeof(*new_##FIELD) * (size - old_size) / SIZE); \ 721 } \ 722 state->COUNT = slot * SIZE; \ 723 kfree(state->FIELD); \ 724 state->FIELD = new_##FIELD; \ 725 return 0; \ 726 } 727 /* realloc_reference_state() */ 728 REALLOC_STATE_FN(reference, acquired_refs, refs, 1) 729 /* realloc_stack_state() */ 730 REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) 731 #undef REALLOC_STATE_FN 732 733 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to 734 * make it consume minimal amount of memory. check_stack_write() access from 735 * the program calls into realloc_func_state() to grow the stack size. 736 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state 737 * which realloc_stack_state() copies over. It points to previous 738 * bpf_verifier_state which is never reallocated. 739 */ 740 static int realloc_func_state(struct bpf_func_state *state, int stack_size, 741 int refs_size, bool copy_old) 742 { 743 int err = realloc_reference_state(state, refs_size, copy_old); 744 if (err) 745 return err; 746 return realloc_stack_state(state, stack_size, copy_old); 747 } 748 749 /* Acquire a pointer id from the env and update the state->refs to include 750 * this new pointer reference. 751 * On success, returns a valid pointer id to associate with the register 752 * On failure, returns a negative errno. 753 */ 754 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) 755 { 756 struct bpf_func_state *state = cur_func(env); 757 int new_ofs = state->acquired_refs; 758 int id, err; 759 760 err = realloc_reference_state(state, state->acquired_refs + 1, true); 761 if (err) 762 return err; 763 id = ++env->id_gen; 764 state->refs[new_ofs].id = id; 765 state->refs[new_ofs].insn_idx = insn_idx; 766 767 return id; 768 } 769 770 /* release function corresponding to acquire_reference_state(). Idempotent. */ 771 static int release_reference_state(struct bpf_func_state *state, int ptr_id) 772 { 773 int i, last_idx; 774 775 last_idx = state->acquired_refs - 1; 776 for (i = 0; i < state->acquired_refs; i++) { 777 if (state->refs[i].id == ptr_id) { 778 if (last_idx && i != last_idx) 779 memcpy(&state->refs[i], &state->refs[last_idx], 780 sizeof(*state->refs)); 781 memset(&state->refs[last_idx], 0, sizeof(*state->refs)); 782 state->acquired_refs--; 783 return 0; 784 } 785 } 786 return -EINVAL; 787 } 788 789 static int transfer_reference_state(struct bpf_func_state *dst, 790 struct bpf_func_state *src) 791 { 792 int err = realloc_reference_state(dst, src->acquired_refs, false); 793 if (err) 794 return err; 795 err = copy_reference_state(dst, src); 796 if (err) 797 return err; 798 return 0; 799 } 800 801 static void free_func_state(struct bpf_func_state *state) 802 { 803 if (!state) 804 return; 805 kfree(state->refs); 806 kfree(state->stack); 807 kfree(state); 808 } 809 810 static void clear_jmp_history(struct bpf_verifier_state *state) 811 { 812 kfree(state->jmp_history); 813 state->jmp_history = NULL; 814 state->jmp_history_cnt = 0; 815 } 816 817 static void free_verifier_state(struct bpf_verifier_state *state, 818 bool free_self) 819 { 820 int i; 821 822 for (i = 0; i <= state->curframe; i++) { 823 free_func_state(state->frame[i]); 824 state->frame[i] = NULL; 825 } 826 clear_jmp_history(state); 827 if (free_self) 828 kfree(state); 829 } 830 831 /* copy verifier state from src to dst growing dst stack space 832 * when necessary to accommodate larger src stack 833 */ 834 static int copy_func_state(struct bpf_func_state *dst, 835 const struct bpf_func_state *src) 836 { 837 int err; 838 839 err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs, 840 false); 841 if (err) 842 return err; 843 memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs)); 844 err = copy_reference_state(dst, src); 845 if (err) 846 return err; 847 return copy_stack_state(dst, src); 848 } 849 850 static int copy_verifier_state(struct bpf_verifier_state *dst_state, 851 const struct bpf_verifier_state *src) 852 { 853 struct bpf_func_state *dst; 854 u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt; 855 int i, err; 856 857 if (dst_state->jmp_history_cnt < src->jmp_history_cnt) { 858 kfree(dst_state->jmp_history); 859 dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER); 860 if (!dst_state->jmp_history) 861 return -ENOMEM; 862 } 863 memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz); 864 dst_state->jmp_history_cnt = src->jmp_history_cnt; 865 866 /* if dst has more stack frames then src frame, free them */ 867 for (i = src->curframe + 1; i <= dst_state->curframe; i++) { 868 free_func_state(dst_state->frame[i]); 869 dst_state->frame[i] = NULL; 870 } 871 dst_state->speculative = src->speculative; 872 dst_state->curframe = src->curframe; 873 dst_state->active_spin_lock = src->active_spin_lock; 874 dst_state->branches = src->branches; 875 dst_state->parent = src->parent; 876 dst_state->first_insn_idx = src->first_insn_idx; 877 dst_state->last_insn_idx = src->last_insn_idx; 878 for (i = 0; i <= src->curframe; i++) { 879 dst = dst_state->frame[i]; 880 if (!dst) { 881 dst = kzalloc(sizeof(*dst), GFP_KERNEL); 882 if (!dst) 883 return -ENOMEM; 884 dst_state->frame[i] = dst; 885 } 886 err = copy_func_state(dst, src->frame[i]); 887 if (err) 888 return err; 889 } 890 return 0; 891 } 892 893 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 894 { 895 while (st) { 896 u32 br = --st->branches; 897 898 /* WARN_ON(br > 1) technically makes sense here, 899 * but see comment in push_stack(), hence: 900 */ 901 WARN_ONCE((int)br < 0, 902 "BUG update_branch_counts:branches_to_explore=%d\n", 903 br); 904 if (br) 905 break; 906 st = st->parent; 907 } 908 } 909 910 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, 911 int *insn_idx, bool pop_log) 912 { 913 struct bpf_verifier_state *cur = env->cur_state; 914 struct bpf_verifier_stack_elem *elem, *head = env->head; 915 int err; 916 917 if (env->head == NULL) 918 return -ENOENT; 919 920 if (cur) { 921 err = copy_verifier_state(cur, &head->st); 922 if (err) 923 return err; 924 } 925 if (pop_log) 926 bpf_vlog_reset(&env->log, head->log_pos); 927 if (insn_idx) 928 *insn_idx = head->insn_idx; 929 if (prev_insn_idx) 930 *prev_insn_idx = head->prev_insn_idx; 931 elem = head->next; 932 free_verifier_state(&head->st, false); 933 kfree(head); 934 env->head = elem; 935 env->stack_size--; 936 return 0; 937 } 938 939 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, 940 int insn_idx, int prev_insn_idx, 941 bool speculative) 942 { 943 struct bpf_verifier_state *cur = env->cur_state; 944 struct bpf_verifier_stack_elem *elem; 945 int err; 946 947 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); 948 if (!elem) 949 goto err; 950 951 elem->insn_idx = insn_idx; 952 elem->prev_insn_idx = prev_insn_idx; 953 elem->next = env->head; 954 elem->log_pos = env->log.len_used; 955 env->head = elem; 956 env->stack_size++; 957 err = copy_verifier_state(&elem->st, cur); 958 if (err) 959 goto err; 960 elem->st.speculative |= speculative; 961 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { 962 verbose(env, "The sequence of %d jumps is too complex.\n", 963 env->stack_size); 964 goto err; 965 } 966 if (elem->st.parent) { 967 ++elem->st.parent->branches; 968 /* WARN_ON(branches > 2) technically makes sense here, 969 * but 970 * 1. speculative states will bump 'branches' for non-branch 971 * instructions 972 * 2. is_state_visited() heuristics may decide not to create 973 * a new state for a sequence of branches and all such current 974 * and cloned states will be pointing to a single parent state 975 * which might have large 'branches' count. 976 */ 977 } 978 return &elem->st; 979 err: 980 free_verifier_state(env->cur_state, true); 981 env->cur_state = NULL; 982 /* pop all elements and return */ 983 while (!pop_stack(env, NULL, NULL, false)); 984 return NULL; 985 } 986 987 #define CALLER_SAVED_REGS 6 988 static const int caller_saved[CALLER_SAVED_REGS] = { 989 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 990 }; 991 992 static void __mark_reg_not_init(const struct bpf_verifier_env *env, 993 struct bpf_reg_state *reg); 994 995 /* Mark the unknown part of a register (variable offset or scalar value) as 996 * known to have the value @imm. 997 */ 998 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) 999 { 1000 /* Clear id, off, and union(map_ptr, range) */ 1001 memset(((u8 *)reg) + sizeof(reg->type), 0, 1002 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); 1003 reg->var_off = tnum_const(imm); 1004 reg->smin_value = (s64)imm; 1005 reg->smax_value = (s64)imm; 1006 reg->umin_value = imm; 1007 reg->umax_value = imm; 1008 1009 reg->s32_min_value = (s32)imm; 1010 reg->s32_max_value = (s32)imm; 1011 reg->u32_min_value = (u32)imm; 1012 reg->u32_max_value = (u32)imm; 1013 } 1014 1015 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm) 1016 { 1017 reg->var_off = tnum_const_subreg(reg->var_off, imm); 1018 reg->s32_min_value = (s32)imm; 1019 reg->s32_max_value = (s32)imm; 1020 reg->u32_min_value = (u32)imm; 1021 reg->u32_max_value = (u32)imm; 1022 } 1023 1024 /* Mark the 'variable offset' part of a register as zero. This should be 1025 * used only on registers holding a pointer type. 1026 */ 1027 static void __mark_reg_known_zero(struct bpf_reg_state *reg) 1028 { 1029 __mark_reg_known(reg, 0); 1030 } 1031 1032 static void __mark_reg_const_zero(struct bpf_reg_state *reg) 1033 { 1034 __mark_reg_known(reg, 0); 1035 reg->type = SCALAR_VALUE; 1036 } 1037 1038 static void mark_reg_known_zero(struct bpf_verifier_env *env, 1039 struct bpf_reg_state *regs, u32 regno) 1040 { 1041 if (WARN_ON(regno >= MAX_BPF_REG)) { 1042 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno); 1043 /* Something bad happened, let's kill all regs */ 1044 for (regno = 0; regno < MAX_BPF_REG; regno++) 1045 __mark_reg_not_init(env, regs + regno); 1046 return; 1047 } 1048 __mark_reg_known_zero(regs + regno); 1049 } 1050 1051 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) 1052 { 1053 return type_is_pkt_pointer(reg->type); 1054 } 1055 1056 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg) 1057 { 1058 return reg_is_pkt_pointer(reg) || 1059 reg->type == PTR_TO_PACKET_END; 1060 } 1061 1062 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */ 1063 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, 1064 enum bpf_reg_type which) 1065 { 1066 /* The register can already have a range from prior markings. 1067 * This is fine as long as it hasn't been advanced from its 1068 * origin. 1069 */ 1070 return reg->type == which && 1071 reg->id == 0 && 1072 reg->off == 0 && 1073 tnum_equals_const(reg->var_off, 0); 1074 } 1075 1076 /* Reset the min/max bounds of a register */ 1077 static void __mark_reg_unbounded(struct bpf_reg_state *reg) 1078 { 1079 reg->smin_value = S64_MIN; 1080 reg->smax_value = S64_MAX; 1081 reg->umin_value = 0; 1082 reg->umax_value = U64_MAX; 1083 1084 reg->s32_min_value = S32_MIN; 1085 reg->s32_max_value = S32_MAX; 1086 reg->u32_min_value = 0; 1087 reg->u32_max_value = U32_MAX; 1088 } 1089 1090 static void __mark_reg64_unbounded(struct bpf_reg_state *reg) 1091 { 1092 reg->smin_value = S64_MIN; 1093 reg->smax_value = S64_MAX; 1094 reg->umin_value = 0; 1095 reg->umax_value = U64_MAX; 1096 } 1097 1098 static void __mark_reg32_unbounded(struct bpf_reg_state *reg) 1099 { 1100 reg->s32_min_value = S32_MIN; 1101 reg->s32_max_value = S32_MAX; 1102 reg->u32_min_value = 0; 1103 reg->u32_max_value = U32_MAX; 1104 } 1105 1106 static void __update_reg32_bounds(struct bpf_reg_state *reg) 1107 { 1108 struct tnum var32_off = tnum_subreg(reg->var_off); 1109 1110 /* min signed is max(sign bit) | min(other bits) */ 1111 reg->s32_min_value = max_t(s32, reg->s32_min_value, 1112 var32_off.value | (var32_off.mask & S32_MIN)); 1113 /* max signed is min(sign bit) | max(other bits) */ 1114 reg->s32_max_value = min_t(s32, reg->s32_max_value, 1115 var32_off.value | (var32_off.mask & S32_MAX)); 1116 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value); 1117 reg->u32_max_value = min(reg->u32_max_value, 1118 (u32)(var32_off.value | var32_off.mask)); 1119 } 1120 1121 static void __update_reg64_bounds(struct bpf_reg_state *reg) 1122 { 1123 /* min signed is max(sign bit) | min(other bits) */ 1124 reg->smin_value = max_t(s64, reg->smin_value, 1125 reg->var_off.value | (reg->var_off.mask & S64_MIN)); 1126 /* max signed is min(sign bit) | max(other bits) */ 1127 reg->smax_value = min_t(s64, reg->smax_value, 1128 reg->var_off.value | (reg->var_off.mask & S64_MAX)); 1129 reg->umin_value = max(reg->umin_value, reg->var_off.value); 1130 reg->umax_value = min(reg->umax_value, 1131 reg->var_off.value | reg->var_off.mask); 1132 } 1133 1134 static void __update_reg_bounds(struct bpf_reg_state *reg) 1135 { 1136 __update_reg32_bounds(reg); 1137 __update_reg64_bounds(reg); 1138 } 1139 1140 /* Uses signed min/max values to inform unsigned, and vice-versa */ 1141 static void __reg32_deduce_bounds(struct bpf_reg_state *reg) 1142 { 1143 /* Learn sign from signed bounds. 1144 * If we cannot cross the sign boundary, then signed and unsigned bounds 1145 * are the same, so combine. This works even in the negative case, e.g. 1146 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff. 1147 */ 1148 if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) { 1149 reg->s32_min_value = reg->u32_min_value = 1150 max_t(u32, reg->s32_min_value, reg->u32_min_value); 1151 reg->s32_max_value = reg->u32_max_value = 1152 min_t(u32, reg->s32_max_value, reg->u32_max_value); 1153 return; 1154 } 1155 /* Learn sign from unsigned bounds. Signed bounds cross the sign 1156 * boundary, so we must be careful. 1157 */ 1158 if ((s32)reg->u32_max_value >= 0) { 1159 /* Positive. We can't learn anything from the smin, but smax 1160 * is positive, hence safe. 1161 */ 1162 reg->s32_min_value = reg->u32_min_value; 1163 reg->s32_max_value = reg->u32_max_value = 1164 min_t(u32, reg->s32_max_value, reg->u32_max_value); 1165 } else if ((s32)reg->u32_min_value < 0) { 1166 /* Negative. We can't learn anything from the smax, but smin 1167 * is negative, hence safe. 1168 */ 1169 reg->s32_min_value = reg->u32_min_value = 1170 max_t(u32, reg->s32_min_value, reg->u32_min_value); 1171 reg->s32_max_value = reg->u32_max_value; 1172 } 1173 } 1174 1175 static void __reg64_deduce_bounds(struct bpf_reg_state *reg) 1176 { 1177 /* Learn sign from signed bounds. 1178 * If we cannot cross the sign boundary, then signed and unsigned bounds 1179 * are the same, so combine. This works even in the negative case, e.g. 1180 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff. 1181 */ 1182 if (reg->smin_value >= 0 || reg->smax_value < 0) { 1183 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, 1184 reg->umin_value); 1185 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, 1186 reg->umax_value); 1187 return; 1188 } 1189 /* Learn sign from unsigned bounds. Signed bounds cross the sign 1190 * boundary, so we must be careful. 1191 */ 1192 if ((s64)reg->umax_value >= 0) { 1193 /* Positive. We can't learn anything from the smin, but smax 1194 * is positive, hence safe. 1195 */ 1196 reg->smin_value = reg->umin_value; 1197 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, 1198 reg->umax_value); 1199 } else if ((s64)reg->umin_value < 0) { 1200 /* Negative. We can't learn anything from the smax, but smin 1201 * is negative, hence safe. 1202 */ 1203 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, 1204 reg->umin_value); 1205 reg->smax_value = reg->umax_value; 1206 } 1207 } 1208 1209 static void __reg_deduce_bounds(struct bpf_reg_state *reg) 1210 { 1211 __reg32_deduce_bounds(reg); 1212 __reg64_deduce_bounds(reg); 1213 } 1214 1215 /* Attempts to improve var_off based on unsigned min/max information */ 1216 static void __reg_bound_offset(struct bpf_reg_state *reg) 1217 { 1218 struct tnum var64_off = tnum_intersect(reg->var_off, 1219 tnum_range(reg->umin_value, 1220 reg->umax_value)); 1221 struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off), 1222 tnum_range(reg->u32_min_value, 1223 reg->u32_max_value)); 1224 1225 reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); 1226 } 1227 1228 static void __reg_assign_32_into_64(struct bpf_reg_state *reg) 1229 { 1230 reg->umin_value = reg->u32_min_value; 1231 reg->umax_value = reg->u32_max_value; 1232 /* Attempt to pull 32-bit signed bounds into 64-bit bounds 1233 * but must be positive otherwise set to worse case bounds 1234 * and refine later from tnum. 1235 */ 1236 if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0) 1237 reg->smax_value = reg->s32_max_value; 1238 else 1239 reg->smax_value = U32_MAX; 1240 if (reg->s32_min_value >= 0) 1241 reg->smin_value = reg->s32_min_value; 1242 else 1243 reg->smin_value = 0; 1244 } 1245 1246 static void __reg_combine_32_into_64(struct bpf_reg_state *reg) 1247 { 1248 /* special case when 64-bit register has upper 32-bit register 1249 * zeroed. Typically happens after zext or <<32, >>32 sequence 1250 * allowing us to use 32-bit bounds directly, 1251 */ 1252 if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) { 1253 __reg_assign_32_into_64(reg); 1254 } else { 1255 /* Otherwise the best we can do is push lower 32bit known and 1256 * unknown bits into register (var_off set from jmp logic) 1257 * then learn as much as possible from the 64-bit tnum 1258 * known and unknown bits. The previous smin/smax bounds are 1259 * invalid here because of jmp32 compare so mark them unknown 1260 * so they do not impact tnum bounds calculation. 1261 */ 1262 __mark_reg64_unbounded(reg); 1263 __update_reg_bounds(reg); 1264 } 1265 1266 /* Intersecting with the old var_off might have improved our bounds 1267 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), 1268 * then new var_off is (0; 0x7f...fc) which improves our umax. 1269 */ 1270 __reg_deduce_bounds(reg); 1271 __reg_bound_offset(reg); 1272 __update_reg_bounds(reg); 1273 } 1274 1275 static bool __reg64_bound_s32(s64 a) 1276 { 1277 if (a > S32_MIN && a < S32_MAX) 1278 return true; 1279 return false; 1280 } 1281 1282 static bool __reg64_bound_u32(u64 a) 1283 { 1284 if (a > U32_MIN && a < U32_MAX) 1285 return true; 1286 return false; 1287 } 1288 1289 static void __reg_combine_64_into_32(struct bpf_reg_state *reg) 1290 { 1291 __mark_reg32_unbounded(reg); 1292 1293 if (__reg64_bound_s32(reg->smin_value)) 1294 reg->s32_min_value = (s32)reg->smin_value; 1295 if (__reg64_bound_s32(reg->smax_value)) 1296 reg->s32_max_value = (s32)reg->smax_value; 1297 if (__reg64_bound_u32(reg->umin_value)) 1298 reg->u32_min_value = (u32)reg->umin_value; 1299 if (__reg64_bound_u32(reg->umax_value)) 1300 reg->u32_max_value = (u32)reg->umax_value; 1301 1302 /* Intersecting with the old var_off might have improved our bounds 1303 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), 1304 * then new var_off is (0; 0x7f...fc) which improves our umax. 1305 */ 1306 __reg_deduce_bounds(reg); 1307 __reg_bound_offset(reg); 1308 __update_reg_bounds(reg); 1309 } 1310 1311 /* Mark a register as having a completely unknown (scalar) value. */ 1312 static void __mark_reg_unknown(const struct bpf_verifier_env *env, 1313 struct bpf_reg_state *reg) 1314 { 1315 /* 1316 * Clear type, id, off, and union(map_ptr, range) and 1317 * padding between 'type' and union 1318 */ 1319 memset(reg, 0, offsetof(struct bpf_reg_state, var_off)); 1320 reg->type = SCALAR_VALUE; 1321 reg->var_off = tnum_unknown; 1322 reg->frameno = 0; 1323 reg->precise = env->subprog_cnt > 1 || !env->bpf_capable; 1324 __mark_reg_unbounded(reg); 1325 } 1326 1327 static void mark_reg_unknown(struct bpf_verifier_env *env, 1328 struct bpf_reg_state *regs, u32 regno) 1329 { 1330 if (WARN_ON(regno >= MAX_BPF_REG)) { 1331 verbose(env, "mark_reg_unknown(regs, %u)\n", regno); 1332 /* Something bad happened, let's kill all regs except FP */ 1333 for (regno = 0; regno < BPF_REG_FP; regno++) 1334 __mark_reg_not_init(env, regs + regno); 1335 return; 1336 } 1337 __mark_reg_unknown(env, regs + regno); 1338 } 1339 1340 static void __mark_reg_not_init(const struct bpf_verifier_env *env, 1341 struct bpf_reg_state *reg) 1342 { 1343 __mark_reg_unknown(env, reg); 1344 reg->type = NOT_INIT; 1345 } 1346 1347 static void mark_reg_not_init(struct bpf_verifier_env *env, 1348 struct bpf_reg_state *regs, u32 regno) 1349 { 1350 if (WARN_ON(regno >= MAX_BPF_REG)) { 1351 verbose(env, "mark_reg_not_init(regs, %u)\n", regno); 1352 /* Something bad happened, let's kill all regs except FP */ 1353 for (regno = 0; regno < BPF_REG_FP; regno++) 1354 __mark_reg_not_init(env, regs + regno); 1355 return; 1356 } 1357 __mark_reg_not_init(env, regs + regno); 1358 } 1359 1360 static void mark_btf_ld_reg(struct bpf_verifier_env *env, 1361 struct bpf_reg_state *regs, u32 regno, 1362 enum bpf_reg_type reg_type, u32 btf_id) 1363 { 1364 if (reg_type == SCALAR_VALUE) { 1365 mark_reg_unknown(env, regs, regno); 1366 return; 1367 } 1368 mark_reg_known_zero(env, regs, regno); 1369 regs[regno].type = PTR_TO_BTF_ID; 1370 regs[regno].btf_id = btf_id; 1371 } 1372 1373 #define DEF_NOT_SUBREG (0) 1374 static void init_reg_state(struct bpf_verifier_env *env, 1375 struct bpf_func_state *state) 1376 { 1377 struct bpf_reg_state *regs = state->regs; 1378 int i; 1379 1380 for (i = 0; i < MAX_BPF_REG; i++) { 1381 mark_reg_not_init(env, regs, i); 1382 regs[i].live = REG_LIVE_NONE; 1383 regs[i].parent = NULL; 1384 regs[i].subreg_def = DEF_NOT_SUBREG; 1385 } 1386 1387 /* frame pointer */ 1388 regs[BPF_REG_FP].type = PTR_TO_STACK; 1389 mark_reg_known_zero(env, regs, BPF_REG_FP); 1390 regs[BPF_REG_FP].frameno = state->frameno; 1391 } 1392 1393 #define BPF_MAIN_FUNC (-1) 1394 static void init_func_state(struct bpf_verifier_env *env, 1395 struct bpf_func_state *state, 1396 int callsite, int frameno, int subprogno) 1397 { 1398 state->callsite = callsite; 1399 state->frameno = frameno; 1400 state->subprogno = subprogno; 1401 init_reg_state(env, state); 1402 } 1403 1404 enum reg_arg_type { 1405 SRC_OP, /* register is used as source operand */ 1406 DST_OP, /* register is used as destination operand */ 1407 DST_OP_NO_MARK /* same as above, check only, don't mark */ 1408 }; 1409 1410 static int cmp_subprogs(const void *a, const void *b) 1411 { 1412 return ((struct bpf_subprog_info *)a)->start - 1413 ((struct bpf_subprog_info *)b)->start; 1414 } 1415 1416 static int find_subprog(struct bpf_verifier_env *env, int off) 1417 { 1418 struct bpf_subprog_info *p; 1419 1420 p = bsearch(&off, env->subprog_info, env->subprog_cnt, 1421 sizeof(env->subprog_info[0]), cmp_subprogs); 1422 if (!p) 1423 return -ENOENT; 1424 return p - env->subprog_info; 1425 1426 } 1427 1428 static int add_subprog(struct bpf_verifier_env *env, int off) 1429 { 1430 int insn_cnt = env->prog->len; 1431 int ret; 1432 1433 if (off >= insn_cnt || off < 0) { 1434 verbose(env, "call to invalid destination\n"); 1435 return -EINVAL; 1436 } 1437 ret = find_subprog(env, off); 1438 if (ret >= 0) 1439 return 0; 1440 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) { 1441 verbose(env, "too many subprograms\n"); 1442 return -E2BIG; 1443 } 1444 env->subprog_info[env->subprog_cnt++].start = off; 1445 sort(env->subprog_info, env->subprog_cnt, 1446 sizeof(env->subprog_info[0]), cmp_subprogs, NULL); 1447 return 0; 1448 } 1449 1450 static int check_subprogs(struct bpf_verifier_env *env) 1451 { 1452 int i, ret, subprog_start, subprog_end, off, cur_subprog = 0; 1453 struct bpf_subprog_info *subprog = env->subprog_info; 1454 struct bpf_insn *insn = env->prog->insnsi; 1455 int insn_cnt = env->prog->len; 1456 1457 /* Add entry function. */ 1458 ret = add_subprog(env, 0); 1459 if (ret < 0) 1460 return ret; 1461 1462 /* determine subprog starts. The end is one before the next starts */ 1463 for (i = 0; i < insn_cnt; i++) { 1464 if (insn[i].code != (BPF_JMP | BPF_CALL)) 1465 continue; 1466 if (insn[i].src_reg != BPF_PSEUDO_CALL) 1467 continue; 1468 if (!env->bpf_capable) { 1469 verbose(env, 1470 "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); 1471 return -EPERM; 1472 } 1473 ret = add_subprog(env, i + insn[i].imm + 1); 1474 if (ret < 0) 1475 return ret; 1476 } 1477 1478 /* Add a fake 'exit' subprog which could simplify subprog iteration 1479 * logic. 'subprog_cnt' should not be increased. 1480 */ 1481 subprog[env->subprog_cnt].start = insn_cnt; 1482 1483 if (env->log.level & BPF_LOG_LEVEL2) 1484 for (i = 0; i < env->subprog_cnt; i++) 1485 verbose(env, "func#%d @%d\n", i, subprog[i].start); 1486 1487 /* now check that all jumps are within the same subprog */ 1488 subprog_start = subprog[cur_subprog].start; 1489 subprog_end = subprog[cur_subprog + 1].start; 1490 for (i = 0; i < insn_cnt; i++) { 1491 u8 code = insn[i].code; 1492 1493 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) 1494 goto next; 1495 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) 1496 goto next; 1497 off = i + insn[i].off + 1; 1498 if (off < subprog_start || off >= subprog_end) { 1499 verbose(env, "jump out of range from insn %d to %d\n", i, off); 1500 return -EINVAL; 1501 } 1502 next: 1503 if (i == subprog_end - 1) { 1504 /* to avoid fall-through from one subprog into another 1505 * the last insn of the subprog should be either exit 1506 * or unconditional jump back 1507 */ 1508 if (code != (BPF_JMP | BPF_EXIT) && 1509 code != (BPF_JMP | BPF_JA)) { 1510 verbose(env, "last insn is not an exit or jmp\n"); 1511 return -EINVAL; 1512 } 1513 subprog_start = subprog_end; 1514 cur_subprog++; 1515 if (cur_subprog < env->subprog_cnt) 1516 subprog_end = subprog[cur_subprog + 1].start; 1517 } 1518 } 1519 return 0; 1520 } 1521 1522 /* Parentage chain of this register (or stack slot) should take care of all 1523 * issues like callee-saved registers, stack slot allocation time, etc. 1524 */ 1525 static int mark_reg_read(struct bpf_verifier_env *env, 1526 const struct bpf_reg_state *state, 1527 struct bpf_reg_state *parent, u8 flag) 1528 { 1529 bool writes = parent == state->parent; /* Observe write marks */ 1530 int cnt = 0; 1531 1532 while (parent) { 1533 /* if read wasn't screened by an earlier write ... */ 1534 if (writes && state->live & REG_LIVE_WRITTEN) 1535 break; 1536 if (parent->live & REG_LIVE_DONE) { 1537 verbose(env, "verifier BUG type %s var_off %lld off %d\n", 1538 reg_type_str[parent->type], 1539 parent->var_off.value, parent->off); 1540 return -EFAULT; 1541 } 1542 /* The first condition is more likely to be true than the 1543 * second, checked it first. 1544 */ 1545 if ((parent->live & REG_LIVE_READ) == flag || 1546 parent->live & REG_LIVE_READ64) 1547 /* The parentage chain never changes and 1548 * this parent was already marked as LIVE_READ. 1549 * There is no need to keep walking the chain again and 1550 * keep re-marking all parents as LIVE_READ. 1551 * This case happens when the same register is read 1552 * multiple times without writes into it in-between. 1553 * Also, if parent has the stronger REG_LIVE_READ64 set, 1554 * then no need to set the weak REG_LIVE_READ32. 1555 */ 1556 break; 1557 /* ... then we depend on parent's value */ 1558 parent->live |= flag; 1559 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */ 1560 if (flag == REG_LIVE_READ64) 1561 parent->live &= ~REG_LIVE_READ32; 1562 state = parent; 1563 parent = state->parent; 1564 writes = true; 1565 cnt++; 1566 } 1567 1568 if (env->longest_mark_read_walk < cnt) 1569 env->longest_mark_read_walk = cnt; 1570 return 0; 1571 } 1572 1573 /* This function is supposed to be used by the following 32-bit optimization 1574 * code only. It returns TRUE if the source or destination register operates 1575 * on 64-bit, otherwise return FALSE. 1576 */ 1577 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, 1578 u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t) 1579 { 1580 u8 code, class, op; 1581 1582 code = insn->code; 1583 class = BPF_CLASS(code); 1584 op = BPF_OP(code); 1585 if (class == BPF_JMP) { 1586 /* BPF_EXIT for "main" will reach here. Return TRUE 1587 * conservatively. 1588 */ 1589 if (op == BPF_EXIT) 1590 return true; 1591 if (op == BPF_CALL) { 1592 /* BPF to BPF call will reach here because of marking 1593 * caller saved clobber with DST_OP_NO_MARK for which we 1594 * don't care the register def because they are anyway 1595 * marked as NOT_INIT already. 1596 */ 1597 if (insn->src_reg == BPF_PSEUDO_CALL) 1598 return false; 1599 /* Helper call will reach here because of arg type 1600 * check, conservatively return TRUE. 1601 */ 1602 if (t == SRC_OP) 1603 return true; 1604 1605 return false; 1606 } 1607 } 1608 1609 if (class == BPF_ALU64 || class == BPF_JMP || 1610 /* BPF_END always use BPF_ALU class. */ 1611 (class == BPF_ALU && op == BPF_END && insn->imm == 64)) 1612 return true; 1613 1614 if (class == BPF_ALU || class == BPF_JMP32) 1615 return false; 1616 1617 if (class == BPF_LDX) { 1618 if (t != SRC_OP) 1619 return BPF_SIZE(code) == BPF_DW; 1620 /* LDX source must be ptr. */ 1621 return true; 1622 } 1623 1624 if (class == BPF_STX) { 1625 if (reg->type != SCALAR_VALUE) 1626 return true; 1627 return BPF_SIZE(code) == BPF_DW; 1628 } 1629 1630 if (class == BPF_LD) { 1631 u8 mode = BPF_MODE(code); 1632 1633 /* LD_IMM64 */ 1634 if (mode == BPF_IMM) 1635 return true; 1636 1637 /* Both LD_IND and LD_ABS return 32-bit data. */ 1638 if (t != SRC_OP) 1639 return false; 1640 1641 /* Implicit ctx ptr. */ 1642 if (regno == BPF_REG_6) 1643 return true; 1644 1645 /* Explicit source could be any width. */ 1646 return true; 1647 } 1648 1649 if (class == BPF_ST) 1650 /* The only source register for BPF_ST is a ptr. */ 1651 return true; 1652 1653 /* Conservatively return true at default. */ 1654 return true; 1655 } 1656 1657 /* Return TRUE if INSN doesn't have explicit value define. */ 1658 static bool insn_no_def(struct bpf_insn *insn) 1659 { 1660 u8 class = BPF_CLASS(insn->code); 1661 1662 return (class == BPF_JMP || class == BPF_JMP32 || 1663 class == BPF_STX || class == BPF_ST); 1664 } 1665 1666 /* Return TRUE if INSN has defined any 32-bit value explicitly. */ 1667 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn) 1668 { 1669 if (insn_no_def(insn)) 1670 return false; 1671 1672 return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP); 1673 } 1674 1675 static void mark_insn_zext(struct bpf_verifier_env *env, 1676 struct bpf_reg_state *reg) 1677 { 1678 s32 def_idx = reg->subreg_def; 1679 1680 if (def_idx == DEF_NOT_SUBREG) 1681 return; 1682 1683 env->insn_aux_data[def_idx - 1].zext_dst = true; 1684 /* The dst will be zero extended, so won't be sub-register anymore. */ 1685 reg->subreg_def = DEF_NOT_SUBREG; 1686 } 1687 1688 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, 1689 enum reg_arg_type t) 1690 { 1691 struct bpf_verifier_state *vstate = env->cur_state; 1692 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 1693 struct bpf_insn *insn = env->prog->insnsi + env->insn_idx; 1694 struct bpf_reg_state *reg, *regs = state->regs; 1695 bool rw64; 1696 1697 if (regno >= MAX_BPF_REG) { 1698 verbose(env, "R%d is invalid\n", regno); 1699 return -EINVAL; 1700 } 1701 1702 reg = ®s[regno]; 1703 rw64 = is_reg64(env, insn, regno, reg, t); 1704 if (t == SRC_OP) { 1705 /* check whether register used as source operand can be read */ 1706 if (reg->type == NOT_INIT) { 1707 verbose(env, "R%d !read_ok\n", regno); 1708 return -EACCES; 1709 } 1710 /* We don't need to worry about FP liveness because it's read-only */ 1711 if (regno == BPF_REG_FP) 1712 return 0; 1713 1714 if (rw64) 1715 mark_insn_zext(env, reg); 1716 1717 return mark_reg_read(env, reg, reg->parent, 1718 rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32); 1719 } else { 1720 /* check whether register used as dest operand can be written to */ 1721 if (regno == BPF_REG_FP) { 1722 verbose(env, "frame pointer is read only\n"); 1723 return -EACCES; 1724 } 1725 reg->live |= REG_LIVE_WRITTEN; 1726 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1; 1727 if (t == DST_OP) 1728 mark_reg_unknown(env, regs, regno); 1729 } 1730 return 0; 1731 } 1732 1733 /* for any branch, call, exit record the history of jmps in the given state */ 1734 static int push_jmp_history(struct bpf_verifier_env *env, 1735 struct bpf_verifier_state *cur) 1736 { 1737 u32 cnt = cur->jmp_history_cnt; 1738 struct bpf_idx_pair *p; 1739 1740 cnt++; 1741 p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER); 1742 if (!p) 1743 return -ENOMEM; 1744 p[cnt - 1].idx = env->insn_idx; 1745 p[cnt - 1].prev_idx = env->prev_insn_idx; 1746 cur->jmp_history = p; 1747 cur->jmp_history_cnt = cnt; 1748 return 0; 1749 } 1750 1751 /* Backtrack one insn at a time. If idx is not at the top of recorded 1752 * history then previous instruction came from straight line execution. 1753 */ 1754 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, 1755 u32 *history) 1756 { 1757 u32 cnt = *history; 1758 1759 if (cnt && st->jmp_history[cnt - 1].idx == i) { 1760 i = st->jmp_history[cnt - 1].prev_idx; 1761 (*history)--; 1762 } else { 1763 i--; 1764 } 1765 return i; 1766 } 1767 1768 /* For given verifier state backtrack_insn() is called from the last insn to 1769 * the first insn. Its purpose is to compute a bitmask of registers and 1770 * stack slots that needs precision in the parent verifier state. 1771 */ 1772 static int backtrack_insn(struct bpf_verifier_env *env, int idx, 1773 u32 *reg_mask, u64 *stack_mask) 1774 { 1775 const struct bpf_insn_cbs cbs = { 1776 .cb_print = verbose, 1777 .private_data = env, 1778 }; 1779 struct bpf_insn *insn = env->prog->insnsi + idx; 1780 u8 class = BPF_CLASS(insn->code); 1781 u8 opcode = BPF_OP(insn->code); 1782 u8 mode = BPF_MODE(insn->code); 1783 u32 dreg = 1u << insn->dst_reg; 1784 u32 sreg = 1u << insn->src_reg; 1785 u32 spi; 1786 1787 if (insn->code == 0) 1788 return 0; 1789 if (env->log.level & BPF_LOG_LEVEL) { 1790 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask); 1791 verbose(env, "%d: ", idx); 1792 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); 1793 } 1794 1795 if (class == BPF_ALU || class == BPF_ALU64) { 1796 if (!(*reg_mask & dreg)) 1797 return 0; 1798 if (opcode == BPF_MOV) { 1799 if (BPF_SRC(insn->code) == BPF_X) { 1800 /* dreg = sreg 1801 * dreg needs precision after this insn 1802 * sreg needs precision before this insn 1803 */ 1804 *reg_mask &= ~dreg; 1805 *reg_mask |= sreg; 1806 } else { 1807 /* dreg = K 1808 * dreg needs precision after this insn. 1809 * Corresponding register is already marked 1810 * as precise=true in this verifier state. 1811 * No further markings in parent are necessary 1812 */ 1813 *reg_mask &= ~dreg; 1814 } 1815 } else { 1816 if (BPF_SRC(insn->code) == BPF_X) { 1817 /* dreg += sreg 1818 * both dreg and sreg need precision 1819 * before this insn 1820 */ 1821 *reg_mask |= sreg; 1822 } /* else dreg += K 1823 * dreg still needs precision before this insn 1824 */ 1825 } 1826 } else if (class == BPF_LDX) { 1827 if (!(*reg_mask & dreg)) 1828 return 0; 1829 *reg_mask &= ~dreg; 1830 1831 /* scalars can only be spilled into stack w/o losing precision. 1832 * Load from any other memory can be zero extended. 1833 * The desire to keep that precision is already indicated 1834 * by 'precise' mark in corresponding register of this state. 1835 * No further tracking necessary. 1836 */ 1837 if (insn->src_reg != BPF_REG_FP) 1838 return 0; 1839 if (BPF_SIZE(insn->code) != BPF_DW) 1840 return 0; 1841 1842 /* dreg = *(u64 *)[fp - off] was a fill from the stack. 1843 * that [fp - off] slot contains scalar that needs to be 1844 * tracked with precision 1845 */ 1846 spi = (-insn->off - 1) / BPF_REG_SIZE; 1847 if (spi >= 64) { 1848 verbose(env, "BUG spi %d\n", spi); 1849 WARN_ONCE(1, "verifier backtracking bug"); 1850 return -EFAULT; 1851 } 1852 *stack_mask |= 1ull << spi; 1853 } else if (class == BPF_STX || class == BPF_ST) { 1854 if (*reg_mask & dreg) 1855 /* stx & st shouldn't be using _scalar_ dst_reg 1856 * to access memory. It means backtracking 1857 * encountered a case of pointer subtraction. 1858 */ 1859 return -ENOTSUPP; 1860 /* scalars can only be spilled into stack */ 1861 if (insn->dst_reg != BPF_REG_FP) 1862 return 0; 1863 if (BPF_SIZE(insn->code) != BPF_DW) 1864 return 0; 1865 spi = (-insn->off - 1) / BPF_REG_SIZE; 1866 if (spi >= 64) { 1867 verbose(env, "BUG spi %d\n", spi); 1868 WARN_ONCE(1, "verifier backtracking bug"); 1869 return -EFAULT; 1870 } 1871 if (!(*stack_mask & (1ull << spi))) 1872 return 0; 1873 *stack_mask &= ~(1ull << spi); 1874 if (class == BPF_STX) 1875 *reg_mask |= sreg; 1876 } else if (class == BPF_JMP || class == BPF_JMP32) { 1877 if (opcode == BPF_CALL) { 1878 if (insn->src_reg == BPF_PSEUDO_CALL) 1879 return -ENOTSUPP; 1880 /* regular helper call sets R0 */ 1881 *reg_mask &= ~1; 1882 if (*reg_mask & 0x3f) { 1883 /* if backtracing was looking for registers R1-R5 1884 * they should have been found already. 1885 */ 1886 verbose(env, "BUG regs %x\n", *reg_mask); 1887 WARN_ONCE(1, "verifier backtracking bug"); 1888 return -EFAULT; 1889 } 1890 } else if (opcode == BPF_EXIT) { 1891 return -ENOTSUPP; 1892 } 1893 } else if (class == BPF_LD) { 1894 if (!(*reg_mask & dreg)) 1895 return 0; 1896 *reg_mask &= ~dreg; 1897 /* It's ld_imm64 or ld_abs or ld_ind. 1898 * For ld_imm64 no further tracking of precision 1899 * into parent is necessary 1900 */ 1901 if (mode == BPF_IND || mode == BPF_ABS) 1902 /* to be analyzed */ 1903 return -ENOTSUPP; 1904 } 1905 return 0; 1906 } 1907 1908 /* the scalar precision tracking algorithm: 1909 * . at the start all registers have precise=false. 1910 * . scalar ranges are tracked as normal through alu and jmp insns. 1911 * . once precise value of the scalar register is used in: 1912 * . ptr + scalar alu 1913 * . if (scalar cond K|scalar) 1914 * . helper_call(.., scalar, ...) where ARG_CONST is expected 1915 * backtrack through the verifier states and mark all registers and 1916 * stack slots with spilled constants that these scalar regisers 1917 * should be precise. 1918 * . during state pruning two registers (or spilled stack slots) 1919 * are equivalent if both are not precise. 1920 * 1921 * Note the verifier cannot simply walk register parentage chain, 1922 * since many different registers and stack slots could have been 1923 * used to compute single precise scalar. 1924 * 1925 * The approach of starting with precise=true for all registers and then 1926 * backtrack to mark a register as not precise when the verifier detects 1927 * that program doesn't care about specific value (e.g., when helper 1928 * takes register as ARG_ANYTHING parameter) is not safe. 1929 * 1930 * It's ok to walk single parentage chain of the verifier states. 1931 * It's possible that this backtracking will go all the way till 1st insn. 1932 * All other branches will be explored for needing precision later. 1933 * 1934 * The backtracking needs to deal with cases like: 1935 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0) 1936 * r9 -= r8 1937 * r5 = r9 1938 * if r5 > 0x79f goto pc+7 1939 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff)) 1940 * r5 += 1 1941 * ... 1942 * call bpf_perf_event_output#25 1943 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO 1944 * 1945 * and this case: 1946 * r6 = 1 1947 * call foo // uses callee's r6 inside to compute r0 1948 * r0 += r6 1949 * if r0 == 0 goto 1950 * 1951 * to track above reg_mask/stack_mask needs to be independent for each frame. 1952 * 1953 * Also if parent's curframe > frame where backtracking started, 1954 * the verifier need to mark registers in both frames, otherwise callees 1955 * may incorrectly prune callers. This is similar to 1956 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences") 1957 * 1958 * For now backtracking falls back into conservative marking. 1959 */ 1960 static void mark_all_scalars_precise(struct bpf_verifier_env *env, 1961 struct bpf_verifier_state *st) 1962 { 1963 struct bpf_func_state *func; 1964 struct bpf_reg_state *reg; 1965 int i, j; 1966 1967 /* big hammer: mark all scalars precise in this path. 1968 * pop_stack may still get !precise scalars. 1969 */ 1970 for (; st; st = st->parent) 1971 for (i = 0; i <= st->curframe; i++) { 1972 func = st->frame[i]; 1973 for (j = 0; j < BPF_REG_FP; j++) { 1974 reg = &func->regs[j]; 1975 if (reg->type != SCALAR_VALUE) 1976 continue; 1977 reg->precise = true; 1978 } 1979 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { 1980 if (func->stack[j].slot_type[0] != STACK_SPILL) 1981 continue; 1982 reg = &func->stack[j].spilled_ptr; 1983 if (reg->type != SCALAR_VALUE) 1984 continue; 1985 reg->precise = true; 1986 } 1987 } 1988 } 1989 1990 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, 1991 int spi) 1992 { 1993 struct bpf_verifier_state *st = env->cur_state; 1994 int first_idx = st->first_insn_idx; 1995 int last_idx = env->insn_idx; 1996 struct bpf_func_state *func; 1997 struct bpf_reg_state *reg; 1998 u32 reg_mask = regno >= 0 ? 1u << regno : 0; 1999 u64 stack_mask = spi >= 0 ? 1ull << spi : 0; 2000 bool skip_first = true; 2001 bool new_marks = false; 2002 int i, err; 2003 2004 if (!env->bpf_capable) 2005 return 0; 2006 2007 func = st->frame[st->curframe]; 2008 if (regno >= 0) { 2009 reg = &func->regs[regno]; 2010 if (reg->type != SCALAR_VALUE) { 2011 WARN_ONCE(1, "backtracing misuse"); 2012 return -EFAULT; 2013 } 2014 if (!reg->precise) 2015 new_marks = true; 2016 else 2017 reg_mask = 0; 2018 reg->precise = true; 2019 } 2020 2021 while (spi >= 0) { 2022 if (func->stack[spi].slot_type[0] != STACK_SPILL) { 2023 stack_mask = 0; 2024 break; 2025 } 2026 reg = &func->stack[spi].spilled_ptr; 2027 if (reg->type != SCALAR_VALUE) { 2028 stack_mask = 0; 2029 break; 2030 } 2031 if (!reg->precise) 2032 new_marks = true; 2033 else 2034 stack_mask = 0; 2035 reg->precise = true; 2036 break; 2037 } 2038 2039 if (!new_marks) 2040 return 0; 2041 if (!reg_mask && !stack_mask) 2042 return 0; 2043 for (;;) { 2044 DECLARE_BITMAP(mask, 64); 2045 u32 history = st->jmp_history_cnt; 2046 2047 if (env->log.level & BPF_LOG_LEVEL) 2048 verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx); 2049 for (i = last_idx;;) { 2050 if (skip_first) { 2051 err = 0; 2052 skip_first = false; 2053 } else { 2054 err = backtrack_insn(env, i, ®_mask, &stack_mask); 2055 } 2056 if (err == -ENOTSUPP) { 2057 mark_all_scalars_precise(env, st); 2058 return 0; 2059 } else if (err) { 2060 return err; 2061 } 2062 if (!reg_mask && !stack_mask) 2063 /* Found assignment(s) into tracked register in this state. 2064 * Since this state is already marked, just return. 2065 * Nothing to be tracked further in the parent state. 2066 */ 2067 return 0; 2068 if (i == first_idx) 2069 break; 2070 i = get_prev_insn_idx(st, i, &history); 2071 if (i >= env->prog->len) { 2072 /* This can happen if backtracking reached insn 0 2073 * and there are still reg_mask or stack_mask 2074 * to backtrack. 2075 * It means the backtracking missed the spot where 2076 * particular register was initialized with a constant. 2077 */ 2078 verbose(env, "BUG backtracking idx %d\n", i); 2079 WARN_ONCE(1, "verifier backtracking bug"); 2080 return -EFAULT; 2081 } 2082 } 2083 st = st->parent; 2084 if (!st) 2085 break; 2086 2087 new_marks = false; 2088 func = st->frame[st->curframe]; 2089 bitmap_from_u64(mask, reg_mask); 2090 for_each_set_bit(i, mask, 32) { 2091 reg = &func->regs[i]; 2092 if (reg->type != SCALAR_VALUE) { 2093 reg_mask &= ~(1u << i); 2094 continue; 2095 } 2096 if (!reg->precise) 2097 new_marks = true; 2098 reg->precise = true; 2099 } 2100 2101 bitmap_from_u64(mask, stack_mask); 2102 for_each_set_bit(i, mask, 64) { 2103 if (i >= func->allocated_stack / BPF_REG_SIZE) { 2104 /* the sequence of instructions: 2105 * 2: (bf) r3 = r10 2106 * 3: (7b) *(u64 *)(r3 -8) = r0 2107 * 4: (79) r4 = *(u64 *)(r10 -8) 2108 * doesn't contain jmps. It's backtracked 2109 * as a single block. 2110 * During backtracking insn 3 is not recognized as 2111 * stack access, so at the end of backtracking 2112 * stack slot fp-8 is still marked in stack_mask. 2113 * However the parent state may not have accessed 2114 * fp-8 and it's "unallocated" stack space. 2115 * In such case fallback to conservative. 2116 */ 2117 mark_all_scalars_precise(env, st); 2118 return 0; 2119 } 2120 2121 if (func->stack[i].slot_type[0] != STACK_SPILL) { 2122 stack_mask &= ~(1ull << i); 2123 continue; 2124 } 2125 reg = &func->stack[i].spilled_ptr; 2126 if (reg->type != SCALAR_VALUE) { 2127 stack_mask &= ~(1ull << i); 2128 continue; 2129 } 2130 if (!reg->precise) 2131 new_marks = true; 2132 reg->precise = true; 2133 } 2134 if (env->log.level & BPF_LOG_LEVEL) { 2135 print_verifier_state(env, func); 2136 verbose(env, "parent %s regs=%x stack=%llx marks\n", 2137 new_marks ? "didn't have" : "already had", 2138 reg_mask, stack_mask); 2139 } 2140 2141 if (!reg_mask && !stack_mask) 2142 break; 2143 if (!new_marks) 2144 break; 2145 2146 last_idx = st->last_insn_idx; 2147 first_idx = st->first_insn_idx; 2148 } 2149 return 0; 2150 } 2151 2152 static int mark_chain_precision(struct bpf_verifier_env *env, int regno) 2153 { 2154 return __mark_chain_precision(env, regno, -1); 2155 } 2156 2157 static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi) 2158 { 2159 return __mark_chain_precision(env, -1, spi); 2160 } 2161 2162 static bool is_spillable_regtype(enum bpf_reg_type type) 2163 { 2164 switch (type) { 2165 case PTR_TO_MAP_VALUE: 2166 case PTR_TO_MAP_VALUE_OR_NULL: 2167 case PTR_TO_STACK: 2168 case PTR_TO_CTX: 2169 case PTR_TO_PACKET: 2170 case PTR_TO_PACKET_META: 2171 case PTR_TO_PACKET_END: 2172 case PTR_TO_FLOW_KEYS: 2173 case CONST_PTR_TO_MAP: 2174 case PTR_TO_SOCKET: 2175 case PTR_TO_SOCKET_OR_NULL: 2176 case PTR_TO_SOCK_COMMON: 2177 case PTR_TO_SOCK_COMMON_OR_NULL: 2178 case PTR_TO_TCP_SOCK: 2179 case PTR_TO_TCP_SOCK_OR_NULL: 2180 case PTR_TO_XDP_SOCK: 2181 case PTR_TO_BTF_ID: 2182 case PTR_TO_BTF_ID_OR_NULL: 2183 case PTR_TO_RDONLY_BUF: 2184 case PTR_TO_RDONLY_BUF_OR_NULL: 2185 case PTR_TO_RDWR_BUF: 2186 case PTR_TO_RDWR_BUF_OR_NULL: 2187 return true; 2188 default: 2189 return false; 2190 } 2191 } 2192 2193 /* Does this register contain a constant zero? */ 2194 static bool register_is_null(struct bpf_reg_state *reg) 2195 { 2196 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0); 2197 } 2198 2199 static bool register_is_const(struct bpf_reg_state *reg) 2200 { 2201 return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off); 2202 } 2203 2204 static bool __is_pointer_value(bool allow_ptr_leaks, 2205 const struct bpf_reg_state *reg) 2206 { 2207 if (allow_ptr_leaks) 2208 return false; 2209 2210 return reg->type != SCALAR_VALUE; 2211 } 2212 2213 static void save_register_state(struct bpf_func_state *state, 2214 int spi, struct bpf_reg_state *reg) 2215 { 2216 int i; 2217 2218 state->stack[spi].spilled_ptr = *reg; 2219 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; 2220 2221 for (i = 0; i < BPF_REG_SIZE; i++) 2222 state->stack[spi].slot_type[i] = STACK_SPILL; 2223 } 2224 2225 /* check_stack_read/write functions track spill/fill of registers, 2226 * stack boundary and alignment are checked in check_mem_access() 2227 */ 2228 static int check_stack_write(struct bpf_verifier_env *env, 2229 struct bpf_func_state *state, /* func where register points to */ 2230 int off, int size, int value_regno, int insn_idx) 2231 { 2232 struct bpf_func_state *cur; /* state of the current function */ 2233 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; 2234 u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg; 2235 struct bpf_reg_state *reg = NULL; 2236 2237 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), 2238 state->acquired_refs, true); 2239 if (err) 2240 return err; 2241 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, 2242 * so it's aligned access and [off, off + size) are within stack limits 2243 */ 2244 if (!env->allow_ptr_leaks && 2245 state->stack[spi].slot_type[0] == STACK_SPILL && 2246 size != BPF_REG_SIZE) { 2247 verbose(env, "attempt to corrupt spilled pointer on stack\n"); 2248 return -EACCES; 2249 } 2250 2251 cur = env->cur_state->frame[env->cur_state->curframe]; 2252 if (value_regno >= 0) 2253 reg = &cur->regs[value_regno]; 2254 2255 if (reg && size == BPF_REG_SIZE && register_is_const(reg) && 2256 !register_is_null(reg) && env->bpf_capable) { 2257 if (dst_reg != BPF_REG_FP) { 2258 /* The backtracking logic can only recognize explicit 2259 * stack slot address like [fp - 8]. Other spill of 2260 * scalar via different register has to be conervative. 2261 * Backtrack from here and mark all registers as precise 2262 * that contributed into 'reg' being a constant. 2263 */ 2264 err = mark_chain_precision(env, value_regno); 2265 if (err) 2266 return err; 2267 } 2268 save_register_state(state, spi, reg); 2269 } else if (reg && is_spillable_regtype(reg->type)) { 2270 /* register containing pointer is being spilled into stack */ 2271 if (size != BPF_REG_SIZE) { 2272 verbose_linfo(env, insn_idx, "; "); 2273 verbose(env, "invalid size of register spill\n"); 2274 return -EACCES; 2275 } 2276 2277 if (state != cur && reg->type == PTR_TO_STACK) { 2278 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); 2279 return -EINVAL; 2280 } 2281 2282 if (!env->bypass_spec_v4) { 2283 bool sanitize = false; 2284 2285 if (state->stack[spi].slot_type[0] == STACK_SPILL && 2286 register_is_const(&state->stack[spi].spilled_ptr)) 2287 sanitize = true; 2288 for (i = 0; i < BPF_REG_SIZE; i++) 2289 if (state->stack[spi].slot_type[i] == STACK_MISC) { 2290 sanitize = true; 2291 break; 2292 } 2293 if (sanitize) { 2294 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; 2295 int soff = (-spi - 1) * BPF_REG_SIZE; 2296 2297 /* detected reuse of integer stack slot with a pointer 2298 * which means either llvm is reusing stack slot or 2299 * an attacker is trying to exploit CVE-2018-3639 2300 * (speculative store bypass) 2301 * Have to sanitize that slot with preemptive 2302 * store of zero. 2303 */ 2304 if (*poff && *poff != soff) { 2305 /* disallow programs where single insn stores 2306 * into two different stack slots, since verifier 2307 * cannot sanitize them 2308 */ 2309 verbose(env, 2310 "insn %d cannot access two stack slots fp%d and fp%d", 2311 insn_idx, *poff, soff); 2312 return -EINVAL; 2313 } 2314 *poff = soff; 2315 } 2316 } 2317 save_register_state(state, spi, reg); 2318 } else { 2319 u8 type = STACK_MISC; 2320 2321 /* regular write of data into stack destroys any spilled ptr */ 2322 state->stack[spi].spilled_ptr.type = NOT_INIT; 2323 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */ 2324 if (state->stack[spi].slot_type[0] == STACK_SPILL) 2325 for (i = 0; i < BPF_REG_SIZE; i++) 2326 state->stack[spi].slot_type[i] = STACK_MISC; 2327 2328 /* only mark the slot as written if all 8 bytes were written 2329 * otherwise read propagation may incorrectly stop too soon 2330 * when stack slots are partially written. 2331 * This heuristic means that read propagation will be 2332 * conservative, since it will add reg_live_read marks 2333 * to stack slots all the way to first state when programs 2334 * writes+reads less than 8 bytes 2335 */ 2336 if (size == BPF_REG_SIZE) 2337 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; 2338 2339 /* when we zero initialize stack slots mark them as such */ 2340 if (reg && register_is_null(reg)) { 2341 /* backtracking doesn't work for STACK_ZERO yet. */ 2342 err = mark_chain_precision(env, value_regno); 2343 if (err) 2344 return err; 2345 type = STACK_ZERO; 2346 } 2347 2348 /* Mark slots affected by this stack write. */ 2349 for (i = 0; i < size; i++) 2350 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = 2351 type; 2352 } 2353 return 0; 2354 } 2355 2356 static int check_stack_read(struct bpf_verifier_env *env, 2357 struct bpf_func_state *reg_state /* func where register points to */, 2358 int off, int size, int value_regno) 2359 { 2360 struct bpf_verifier_state *vstate = env->cur_state; 2361 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 2362 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; 2363 struct bpf_reg_state *reg; 2364 u8 *stype; 2365 2366 if (reg_state->allocated_stack <= slot) { 2367 verbose(env, "invalid read from stack off %d+0 size %d\n", 2368 off, size); 2369 return -EACCES; 2370 } 2371 stype = reg_state->stack[spi].slot_type; 2372 reg = ®_state->stack[spi].spilled_ptr; 2373 2374 if (stype[0] == STACK_SPILL) { 2375 if (size != BPF_REG_SIZE) { 2376 if (reg->type != SCALAR_VALUE) { 2377 verbose_linfo(env, env->insn_idx, "; "); 2378 verbose(env, "invalid size of register fill\n"); 2379 return -EACCES; 2380 } 2381 if (value_regno >= 0) { 2382 mark_reg_unknown(env, state->regs, value_regno); 2383 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 2384 } 2385 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); 2386 return 0; 2387 } 2388 for (i = 1; i < BPF_REG_SIZE; i++) { 2389 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { 2390 verbose(env, "corrupted spill memory\n"); 2391 return -EACCES; 2392 } 2393 } 2394 2395 if (value_regno >= 0) { 2396 /* restore register state from stack */ 2397 state->regs[value_regno] = *reg; 2398 /* mark reg as written since spilled pointer state likely 2399 * has its liveness marks cleared by is_state_visited() 2400 * which resets stack/reg liveness for state transitions 2401 */ 2402 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 2403 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) { 2404 /* If value_regno==-1, the caller is asking us whether 2405 * it is acceptable to use this value as a SCALAR_VALUE 2406 * (e.g. for XADD). 2407 * We must not allow unprivileged callers to do that 2408 * with spilled pointers. 2409 */ 2410 verbose(env, "leaking pointer from stack off %d\n", 2411 off); 2412 return -EACCES; 2413 } 2414 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); 2415 } else { 2416 int zeros = 0; 2417 2418 for (i = 0; i < size; i++) { 2419 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC) 2420 continue; 2421 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) { 2422 zeros++; 2423 continue; 2424 } 2425 verbose(env, "invalid read from stack off %d+%d size %d\n", 2426 off, i, size); 2427 return -EACCES; 2428 } 2429 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); 2430 if (value_regno >= 0) { 2431 if (zeros == size) { 2432 /* any size read into register is zero extended, 2433 * so the whole register == const_zero 2434 */ 2435 __mark_reg_const_zero(&state->regs[value_regno]); 2436 /* backtracking doesn't support STACK_ZERO yet, 2437 * so mark it precise here, so that later 2438 * backtracking can stop here. 2439 * Backtracking may not need this if this register 2440 * doesn't participate in pointer adjustment. 2441 * Forward propagation of precise flag is not 2442 * necessary either. This mark is only to stop 2443 * backtracking. Any register that contributed 2444 * to const 0 was marked precise before spill. 2445 */ 2446 state->regs[value_regno].precise = true; 2447 } else { 2448 /* have read misc data from the stack */ 2449 mark_reg_unknown(env, state->regs, value_regno); 2450 } 2451 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 2452 } 2453 } 2454 return 0; 2455 } 2456 2457 static int check_stack_access(struct bpf_verifier_env *env, 2458 const struct bpf_reg_state *reg, 2459 int off, int size) 2460 { 2461 /* Stack accesses must be at a fixed offset, so that we 2462 * can determine what type of data were returned. See 2463 * check_stack_read(). 2464 */ 2465 if (!tnum_is_const(reg->var_off)) { 2466 char tn_buf[48]; 2467 2468 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2469 verbose(env, "variable stack access var_off=%s off=%d size=%d\n", 2470 tn_buf, off, size); 2471 return -EACCES; 2472 } 2473 2474 if (off >= 0 || off < -MAX_BPF_STACK) { 2475 verbose(env, "invalid stack off=%d size=%d\n", off, size); 2476 return -EACCES; 2477 } 2478 2479 return 0; 2480 } 2481 2482 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno, 2483 int off, int size, enum bpf_access_type type) 2484 { 2485 struct bpf_reg_state *regs = cur_regs(env); 2486 struct bpf_map *map = regs[regno].map_ptr; 2487 u32 cap = bpf_map_flags_to_cap(map); 2488 2489 if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) { 2490 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n", 2491 map->value_size, off, size); 2492 return -EACCES; 2493 } 2494 2495 if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) { 2496 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n", 2497 map->value_size, off, size); 2498 return -EACCES; 2499 } 2500 2501 return 0; 2502 } 2503 2504 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */ 2505 static int __check_mem_access(struct bpf_verifier_env *env, int regno, 2506 int off, int size, u32 mem_size, 2507 bool zero_size_allowed) 2508 { 2509 bool size_ok = size > 0 || (size == 0 && zero_size_allowed); 2510 struct bpf_reg_state *reg; 2511 2512 if (off >= 0 && size_ok && (u64)off + size <= mem_size) 2513 return 0; 2514 2515 reg = &cur_regs(env)[regno]; 2516 switch (reg->type) { 2517 case PTR_TO_MAP_VALUE: 2518 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", 2519 mem_size, off, size); 2520 break; 2521 case PTR_TO_PACKET: 2522 case PTR_TO_PACKET_META: 2523 case PTR_TO_PACKET_END: 2524 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", 2525 off, size, regno, reg->id, off, mem_size); 2526 break; 2527 case PTR_TO_MEM: 2528 default: 2529 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n", 2530 mem_size, off, size); 2531 } 2532 2533 return -EACCES; 2534 } 2535 2536 /* check read/write into a memory region with possible variable offset */ 2537 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno, 2538 int off, int size, u32 mem_size, 2539 bool zero_size_allowed) 2540 { 2541 struct bpf_verifier_state *vstate = env->cur_state; 2542 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 2543 struct bpf_reg_state *reg = &state->regs[regno]; 2544 int err; 2545 2546 /* We may have adjusted the register pointing to memory region, so we 2547 * need to try adding each of min_value and max_value to off 2548 * to make sure our theoretical access will be safe. 2549 */ 2550 if (env->log.level & BPF_LOG_LEVEL) 2551 print_verifier_state(env, state); 2552 2553 /* The minimum value is only important with signed 2554 * comparisons where we can't assume the floor of a 2555 * value is 0. If we are using signed variables for our 2556 * index'es we need to make sure that whatever we use 2557 * will have a set floor within our range. 2558 */ 2559 if (reg->smin_value < 0 && 2560 (reg->smin_value == S64_MIN || 2561 (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) || 2562 reg->smin_value + off < 0)) { 2563 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 2564 regno); 2565 return -EACCES; 2566 } 2567 err = __check_mem_access(env, regno, reg->smin_value + off, size, 2568 mem_size, zero_size_allowed); 2569 if (err) { 2570 verbose(env, "R%d min value is outside of the allowed memory range\n", 2571 regno); 2572 return err; 2573 } 2574 2575 /* If we haven't set a max value then we need to bail since we can't be 2576 * sure we won't do bad things. 2577 * If reg->umax_value + off could overflow, treat that as unbounded too. 2578 */ 2579 if (reg->umax_value >= BPF_MAX_VAR_OFF) { 2580 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n", 2581 regno); 2582 return -EACCES; 2583 } 2584 err = __check_mem_access(env, regno, reg->umax_value + off, size, 2585 mem_size, zero_size_allowed); 2586 if (err) { 2587 verbose(env, "R%d max value is outside of the allowed memory range\n", 2588 regno); 2589 return err; 2590 } 2591 2592 return 0; 2593 } 2594 2595 /* check read/write into a map element with possible variable offset */ 2596 static int check_map_access(struct bpf_verifier_env *env, u32 regno, 2597 int off, int size, bool zero_size_allowed) 2598 { 2599 struct bpf_verifier_state *vstate = env->cur_state; 2600 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 2601 struct bpf_reg_state *reg = &state->regs[regno]; 2602 struct bpf_map *map = reg->map_ptr; 2603 int err; 2604 2605 err = check_mem_region_access(env, regno, off, size, map->value_size, 2606 zero_size_allowed); 2607 if (err) 2608 return err; 2609 2610 if (map_value_has_spin_lock(map)) { 2611 u32 lock = map->spin_lock_off; 2612 2613 /* if any part of struct bpf_spin_lock can be touched by 2614 * load/store reject this program. 2615 * To check that [x1, x2) overlaps with [y1, y2) 2616 * it is sufficient to check x1 < y2 && y1 < x2. 2617 */ 2618 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) && 2619 lock < reg->umax_value + off + size) { 2620 verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n"); 2621 return -EACCES; 2622 } 2623 } 2624 return err; 2625 } 2626 2627 #define MAX_PACKET_OFF 0xffff 2628 2629 static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) 2630 { 2631 return prog->aux->linked_prog ? prog->aux->linked_prog->type 2632 : prog->type; 2633 } 2634 2635 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, 2636 const struct bpf_call_arg_meta *meta, 2637 enum bpf_access_type t) 2638 { 2639 enum bpf_prog_type prog_type = resolve_prog_type(env->prog); 2640 2641 switch (prog_type) { 2642 /* Program types only with direct read access go here! */ 2643 case BPF_PROG_TYPE_LWT_IN: 2644 case BPF_PROG_TYPE_LWT_OUT: 2645 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 2646 case BPF_PROG_TYPE_SK_REUSEPORT: 2647 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2648 case BPF_PROG_TYPE_CGROUP_SKB: 2649 if (t == BPF_WRITE) 2650 return false; 2651 /* fallthrough */ 2652 2653 /* Program types with direct read + write access go here! */ 2654 case BPF_PROG_TYPE_SCHED_CLS: 2655 case BPF_PROG_TYPE_SCHED_ACT: 2656 case BPF_PROG_TYPE_XDP: 2657 case BPF_PROG_TYPE_LWT_XMIT: 2658 case BPF_PROG_TYPE_SK_SKB: 2659 case BPF_PROG_TYPE_SK_MSG: 2660 if (meta) 2661 return meta->pkt_access; 2662 2663 env->seen_direct_write = true; 2664 return true; 2665 2666 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2667 if (t == BPF_WRITE) 2668 env->seen_direct_write = true; 2669 2670 return true; 2671 2672 default: 2673 return false; 2674 } 2675 } 2676 2677 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, 2678 int size, bool zero_size_allowed) 2679 { 2680 struct bpf_reg_state *regs = cur_regs(env); 2681 struct bpf_reg_state *reg = ®s[regno]; 2682 int err; 2683 2684 /* We may have added a variable offset to the packet pointer; but any 2685 * reg->range we have comes after that. We are only checking the fixed 2686 * offset. 2687 */ 2688 2689 /* We don't allow negative numbers, because we aren't tracking enough 2690 * detail to prove they're safe. 2691 */ 2692 if (reg->smin_value < 0) { 2693 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 2694 regno); 2695 return -EACCES; 2696 } 2697 err = __check_mem_access(env, regno, off, size, reg->range, 2698 zero_size_allowed); 2699 if (err) { 2700 verbose(env, "R%d offset is outside of the packet\n", regno); 2701 return err; 2702 } 2703 2704 /* __check_mem_access has made sure "off + size - 1" is within u16. 2705 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff, 2706 * otherwise find_good_pkt_pointers would have refused to set range info 2707 * that __check_mem_access would have rejected this pkt access. 2708 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32. 2709 */ 2710 env->prog->aux->max_pkt_offset = 2711 max_t(u32, env->prog->aux->max_pkt_offset, 2712 off + reg->umax_value + size - 1); 2713 2714 return err; 2715 } 2716 2717 /* check access to 'struct bpf_context' fields. Supports fixed offsets only */ 2718 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, 2719 enum bpf_access_type t, enum bpf_reg_type *reg_type, 2720 u32 *btf_id) 2721 { 2722 struct bpf_insn_access_aux info = { 2723 .reg_type = *reg_type, 2724 .log = &env->log, 2725 }; 2726 2727 if (env->ops->is_valid_access && 2728 env->ops->is_valid_access(off, size, t, env->prog, &info)) { 2729 /* A non zero info.ctx_field_size indicates that this field is a 2730 * candidate for later verifier transformation to load the whole 2731 * field and then apply a mask when accessed with a narrower 2732 * access than actual ctx access size. A zero info.ctx_field_size 2733 * will only allow for whole field access and rejects any other 2734 * type of narrower access. 2735 */ 2736 *reg_type = info.reg_type; 2737 2738 if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) 2739 *btf_id = info.btf_id; 2740 else 2741 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; 2742 /* remember the offset of last byte accessed in ctx */ 2743 if (env->prog->aux->max_ctx_offset < off + size) 2744 env->prog->aux->max_ctx_offset = off + size; 2745 return 0; 2746 } 2747 2748 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size); 2749 return -EACCES; 2750 } 2751 2752 static int check_flow_keys_access(struct bpf_verifier_env *env, int off, 2753 int size) 2754 { 2755 if (size < 0 || off < 0 || 2756 (u64)off + size > sizeof(struct bpf_flow_keys)) { 2757 verbose(env, "invalid access to flow keys off=%d size=%d\n", 2758 off, size); 2759 return -EACCES; 2760 } 2761 return 0; 2762 } 2763 2764 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, 2765 u32 regno, int off, int size, 2766 enum bpf_access_type t) 2767 { 2768 struct bpf_reg_state *regs = cur_regs(env); 2769 struct bpf_reg_state *reg = ®s[regno]; 2770 struct bpf_insn_access_aux info = {}; 2771 bool valid; 2772 2773 if (reg->smin_value < 0) { 2774 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 2775 regno); 2776 return -EACCES; 2777 } 2778 2779 switch (reg->type) { 2780 case PTR_TO_SOCK_COMMON: 2781 valid = bpf_sock_common_is_valid_access(off, size, t, &info); 2782 break; 2783 case PTR_TO_SOCKET: 2784 valid = bpf_sock_is_valid_access(off, size, t, &info); 2785 break; 2786 case PTR_TO_TCP_SOCK: 2787 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); 2788 break; 2789 case PTR_TO_XDP_SOCK: 2790 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info); 2791 break; 2792 default: 2793 valid = false; 2794 } 2795 2796 2797 if (valid) { 2798 env->insn_aux_data[insn_idx].ctx_field_size = 2799 info.ctx_field_size; 2800 return 0; 2801 } 2802 2803 verbose(env, "R%d invalid %s access off=%d size=%d\n", 2804 regno, reg_type_str[reg->type], off, size); 2805 2806 return -EACCES; 2807 } 2808 2809 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) 2810 { 2811 return cur_regs(env) + regno; 2812 } 2813 2814 static bool is_pointer_value(struct bpf_verifier_env *env, int regno) 2815 { 2816 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno)); 2817 } 2818 2819 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) 2820 { 2821 const struct bpf_reg_state *reg = reg_state(env, regno); 2822 2823 return reg->type == PTR_TO_CTX; 2824 } 2825 2826 static bool is_sk_reg(struct bpf_verifier_env *env, int regno) 2827 { 2828 const struct bpf_reg_state *reg = reg_state(env, regno); 2829 2830 return type_is_sk_pointer(reg->type); 2831 } 2832 2833 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) 2834 { 2835 const struct bpf_reg_state *reg = reg_state(env, regno); 2836 2837 return type_is_pkt_pointer(reg->type); 2838 } 2839 2840 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) 2841 { 2842 const struct bpf_reg_state *reg = reg_state(env, regno); 2843 2844 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */ 2845 return reg->type == PTR_TO_FLOW_KEYS; 2846 } 2847 2848 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, 2849 const struct bpf_reg_state *reg, 2850 int off, int size, bool strict) 2851 { 2852 struct tnum reg_off; 2853 int ip_align; 2854 2855 /* Byte size accesses are always allowed. */ 2856 if (!strict || size == 1) 2857 return 0; 2858 2859 /* For platforms that do not have a Kconfig enabling 2860 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of 2861 * NET_IP_ALIGN is universally set to '2'. And on platforms 2862 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get 2863 * to this code only in strict mode where we want to emulate 2864 * the NET_IP_ALIGN==2 checking. Therefore use an 2865 * unconditional IP align value of '2'. 2866 */ 2867 ip_align = 2; 2868 2869 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off)); 2870 if (!tnum_is_aligned(reg_off, size)) { 2871 char tn_buf[48]; 2872 2873 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2874 verbose(env, 2875 "misaligned packet access off %d+%s+%d+%d size %d\n", 2876 ip_align, tn_buf, reg->off, off, size); 2877 return -EACCES; 2878 } 2879 2880 return 0; 2881 } 2882 2883 static int check_generic_ptr_alignment(struct bpf_verifier_env *env, 2884 const struct bpf_reg_state *reg, 2885 const char *pointer_desc, 2886 int off, int size, bool strict) 2887 { 2888 struct tnum reg_off; 2889 2890 /* Byte size accesses are always allowed. */ 2891 if (!strict || size == 1) 2892 return 0; 2893 2894 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off)); 2895 if (!tnum_is_aligned(reg_off, size)) { 2896 char tn_buf[48]; 2897 2898 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2899 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n", 2900 pointer_desc, tn_buf, reg->off, off, size); 2901 return -EACCES; 2902 } 2903 2904 return 0; 2905 } 2906 2907 static int check_ptr_alignment(struct bpf_verifier_env *env, 2908 const struct bpf_reg_state *reg, int off, 2909 int size, bool strict_alignment_once) 2910 { 2911 bool strict = env->strict_alignment || strict_alignment_once; 2912 const char *pointer_desc = ""; 2913 2914 switch (reg->type) { 2915 case PTR_TO_PACKET: 2916 case PTR_TO_PACKET_META: 2917 /* Special case, because of NET_IP_ALIGN. Given metadata sits 2918 * right in front, treat it the very same way. 2919 */ 2920 return check_pkt_ptr_alignment(env, reg, off, size, strict); 2921 case PTR_TO_FLOW_KEYS: 2922 pointer_desc = "flow keys "; 2923 break; 2924 case PTR_TO_MAP_VALUE: 2925 pointer_desc = "value "; 2926 break; 2927 case PTR_TO_CTX: 2928 pointer_desc = "context "; 2929 break; 2930 case PTR_TO_STACK: 2931 pointer_desc = "stack "; 2932 /* The stack spill tracking logic in check_stack_write() 2933 * and check_stack_read() relies on stack accesses being 2934 * aligned. 2935 */ 2936 strict = true; 2937 break; 2938 case PTR_TO_SOCKET: 2939 pointer_desc = "sock "; 2940 break; 2941 case PTR_TO_SOCK_COMMON: 2942 pointer_desc = "sock_common "; 2943 break; 2944 case PTR_TO_TCP_SOCK: 2945 pointer_desc = "tcp_sock "; 2946 break; 2947 case PTR_TO_XDP_SOCK: 2948 pointer_desc = "xdp_sock "; 2949 break; 2950 default: 2951 break; 2952 } 2953 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size, 2954 strict); 2955 } 2956 2957 static int update_stack_depth(struct bpf_verifier_env *env, 2958 const struct bpf_func_state *func, 2959 int off) 2960 { 2961 u16 stack = env->subprog_info[func->subprogno].stack_depth; 2962 2963 if (stack >= -off) 2964 return 0; 2965 2966 /* update known max for given subprogram */ 2967 env->subprog_info[func->subprogno].stack_depth = -off; 2968 return 0; 2969 } 2970 2971 /* starting from main bpf function walk all instructions of the function 2972 * and recursively walk all callees that given function can call. 2973 * Ignore jump and exit insns. 2974 * Since recursion is prevented by check_cfg() this algorithm 2975 * only needs a local stack of MAX_CALL_FRAMES to remember callsites 2976 */ 2977 static int check_max_stack_depth(struct bpf_verifier_env *env) 2978 { 2979 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end; 2980 struct bpf_subprog_info *subprog = env->subprog_info; 2981 struct bpf_insn *insn = env->prog->insnsi; 2982 int ret_insn[MAX_CALL_FRAMES]; 2983 int ret_prog[MAX_CALL_FRAMES]; 2984 2985 process_func: 2986 /* round up to 32-bytes, since this is granularity 2987 * of interpreter stack size 2988 */ 2989 depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); 2990 if (depth > MAX_BPF_STACK) { 2991 verbose(env, "combined stack size of %d calls is %d. Too large\n", 2992 frame + 1, depth); 2993 return -EACCES; 2994 } 2995 continue_func: 2996 subprog_end = subprog[idx + 1].start; 2997 for (; i < subprog_end; i++) { 2998 if (insn[i].code != (BPF_JMP | BPF_CALL)) 2999 continue; 3000 if (insn[i].src_reg != BPF_PSEUDO_CALL) 3001 continue; 3002 /* remember insn and function to return to */ 3003 ret_insn[frame] = i + 1; 3004 ret_prog[frame] = idx; 3005 3006 /* find the callee */ 3007 i = i + insn[i].imm + 1; 3008 idx = find_subprog(env, i); 3009 if (idx < 0) { 3010 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", 3011 i); 3012 return -EFAULT; 3013 } 3014 frame++; 3015 if (frame >= MAX_CALL_FRAMES) { 3016 verbose(env, "the call stack of %d frames is too deep !\n", 3017 frame); 3018 return -E2BIG; 3019 } 3020 goto process_func; 3021 } 3022 /* end of for() loop means the last insn of the 'subprog' 3023 * was reached. Doesn't matter whether it was JA or EXIT 3024 */ 3025 if (frame == 0) 3026 return 0; 3027 depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); 3028 frame--; 3029 i = ret_insn[frame]; 3030 idx = ret_prog[frame]; 3031 goto continue_func; 3032 } 3033 3034 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 3035 static int get_callee_stack_depth(struct bpf_verifier_env *env, 3036 const struct bpf_insn *insn, int idx) 3037 { 3038 int start = idx + insn->imm + 1, subprog; 3039 3040 subprog = find_subprog(env, start); 3041 if (subprog < 0) { 3042 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", 3043 start); 3044 return -EFAULT; 3045 } 3046 return env->subprog_info[subprog].stack_depth; 3047 } 3048 #endif 3049 3050 int check_ctx_reg(struct bpf_verifier_env *env, 3051 const struct bpf_reg_state *reg, int regno) 3052 { 3053 /* Access to ctx or passing it to a helper is only allowed in 3054 * its original, unmodified form. 3055 */ 3056 3057 if (reg->off) { 3058 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n", 3059 regno, reg->off); 3060 return -EACCES; 3061 } 3062 3063 if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 3064 char tn_buf[48]; 3065 3066 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 3067 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf); 3068 return -EACCES; 3069 } 3070 3071 return 0; 3072 } 3073 3074 static int __check_buffer_access(struct bpf_verifier_env *env, 3075 const char *buf_info, 3076 const struct bpf_reg_state *reg, 3077 int regno, int off, int size) 3078 { 3079 if (off < 0) { 3080 verbose(env, 3081 "R%d invalid %s buffer access: off=%d, size=%d\n", 3082 regno, buf_info, off, size); 3083 return -EACCES; 3084 } 3085 if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 3086 char tn_buf[48]; 3087 3088 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 3089 verbose(env, 3090 "R%d invalid variable buffer offset: off=%d, var_off=%s\n", 3091 regno, off, tn_buf); 3092 return -EACCES; 3093 } 3094 3095 return 0; 3096 } 3097 3098 static int check_tp_buffer_access(struct bpf_verifier_env *env, 3099 const struct bpf_reg_state *reg, 3100 int regno, int off, int size) 3101 { 3102 int err; 3103 3104 err = __check_buffer_access(env, "tracepoint", reg, regno, off, size); 3105 if (err) 3106 return err; 3107 3108 if (off + size > env->prog->aux->max_tp_access) 3109 env->prog->aux->max_tp_access = off + size; 3110 3111 return 0; 3112 } 3113 3114 static int check_buffer_access(struct bpf_verifier_env *env, 3115 const struct bpf_reg_state *reg, 3116 int regno, int off, int size, 3117 bool zero_size_allowed, 3118 const char *buf_info, 3119 u32 *max_access) 3120 { 3121 int err; 3122 3123 err = __check_buffer_access(env, buf_info, reg, regno, off, size); 3124 if (err) 3125 return err; 3126 3127 if (off + size > *max_access) 3128 *max_access = off + size; 3129 3130 return 0; 3131 } 3132 3133 /* BPF architecture zero extends alu32 ops into 64-bit registesr */ 3134 static void zext_32_to_64(struct bpf_reg_state *reg) 3135 { 3136 reg->var_off = tnum_subreg(reg->var_off); 3137 __reg_assign_32_into_64(reg); 3138 } 3139 3140 /* truncate register to smaller size (in bytes) 3141 * must be called with size < BPF_REG_SIZE 3142 */ 3143 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) 3144 { 3145 u64 mask; 3146 3147 /* clear high bits in bit representation */ 3148 reg->var_off = tnum_cast(reg->var_off, size); 3149 3150 /* fix arithmetic bounds */ 3151 mask = ((u64)1 << (size * 8)) - 1; 3152 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) { 3153 reg->umin_value &= mask; 3154 reg->umax_value &= mask; 3155 } else { 3156 reg->umin_value = 0; 3157 reg->umax_value = mask; 3158 } 3159 reg->smin_value = reg->umin_value; 3160 reg->smax_value = reg->umax_value; 3161 3162 /* If size is smaller than 32bit register the 32bit register 3163 * values are also truncated so we push 64-bit bounds into 3164 * 32-bit bounds. Above were truncated < 32-bits already. 3165 */ 3166 if (size >= 4) 3167 return; 3168 __reg_combine_64_into_32(reg); 3169 } 3170 3171 static bool bpf_map_is_rdonly(const struct bpf_map *map) 3172 { 3173 return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen; 3174 } 3175 3176 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) 3177 { 3178 void *ptr; 3179 u64 addr; 3180 int err; 3181 3182 err = map->ops->map_direct_value_addr(map, &addr, off); 3183 if (err) 3184 return err; 3185 ptr = (void *)(long)addr + off; 3186 3187 switch (size) { 3188 case sizeof(u8): 3189 *val = (u64)*(u8 *)ptr; 3190 break; 3191 case sizeof(u16): 3192 *val = (u64)*(u16 *)ptr; 3193 break; 3194 case sizeof(u32): 3195 *val = (u64)*(u32 *)ptr; 3196 break; 3197 case sizeof(u64): 3198 *val = *(u64 *)ptr; 3199 break; 3200 default: 3201 return -EINVAL; 3202 } 3203 return 0; 3204 } 3205 3206 static int check_ptr_to_btf_access(struct bpf_verifier_env *env, 3207 struct bpf_reg_state *regs, 3208 int regno, int off, int size, 3209 enum bpf_access_type atype, 3210 int value_regno) 3211 { 3212 struct bpf_reg_state *reg = regs + regno; 3213 const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id); 3214 const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off); 3215 u32 btf_id; 3216 int ret; 3217 3218 if (off < 0) { 3219 verbose(env, 3220 "R%d is ptr_%s invalid negative access: off=%d\n", 3221 regno, tname, off); 3222 return -EACCES; 3223 } 3224 if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 3225 char tn_buf[48]; 3226 3227 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 3228 verbose(env, 3229 "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n", 3230 regno, tname, off, tn_buf); 3231 return -EACCES; 3232 } 3233 3234 if (env->ops->btf_struct_access) { 3235 ret = env->ops->btf_struct_access(&env->log, t, off, size, 3236 atype, &btf_id); 3237 } else { 3238 if (atype != BPF_READ) { 3239 verbose(env, "only read is supported\n"); 3240 return -EACCES; 3241 } 3242 3243 ret = btf_struct_access(&env->log, t, off, size, atype, 3244 &btf_id); 3245 } 3246 3247 if (ret < 0) 3248 return ret; 3249 3250 if (atype == BPF_READ && value_regno >= 0) 3251 mark_btf_ld_reg(env, regs, value_regno, ret, btf_id); 3252 3253 return 0; 3254 } 3255 3256 static int check_ptr_to_map_access(struct bpf_verifier_env *env, 3257 struct bpf_reg_state *regs, 3258 int regno, int off, int size, 3259 enum bpf_access_type atype, 3260 int value_regno) 3261 { 3262 struct bpf_reg_state *reg = regs + regno; 3263 struct bpf_map *map = reg->map_ptr; 3264 const struct btf_type *t; 3265 const char *tname; 3266 u32 btf_id; 3267 int ret; 3268 3269 if (!btf_vmlinux) { 3270 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n"); 3271 return -ENOTSUPP; 3272 } 3273 3274 if (!map->ops->map_btf_id || !*map->ops->map_btf_id) { 3275 verbose(env, "map_ptr access not supported for map type %d\n", 3276 map->map_type); 3277 return -ENOTSUPP; 3278 } 3279 3280 t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id); 3281 tname = btf_name_by_offset(btf_vmlinux, t->name_off); 3282 3283 if (!env->allow_ptr_to_map_access) { 3284 verbose(env, 3285 "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n", 3286 tname); 3287 return -EPERM; 3288 } 3289 3290 if (off < 0) { 3291 verbose(env, "R%d is %s invalid negative access: off=%d\n", 3292 regno, tname, off); 3293 return -EACCES; 3294 } 3295 3296 if (atype != BPF_READ) { 3297 verbose(env, "only read from %s is supported\n", tname); 3298 return -EACCES; 3299 } 3300 3301 ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id); 3302 if (ret < 0) 3303 return ret; 3304 3305 if (value_regno >= 0) 3306 mark_btf_ld_reg(env, regs, value_regno, ret, btf_id); 3307 3308 return 0; 3309 } 3310 3311 3312 /* check whether memory at (regno + off) is accessible for t = (read | write) 3313 * if t==write, value_regno is a register which value is stored into memory 3314 * if t==read, value_regno is a register which will receive the value from memory 3315 * if t==write && value_regno==-1, some unknown value is stored into memory 3316 * if t==read && value_regno==-1, don't care what we read from memory 3317 */ 3318 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, 3319 int off, int bpf_size, enum bpf_access_type t, 3320 int value_regno, bool strict_alignment_once) 3321 { 3322 struct bpf_reg_state *regs = cur_regs(env); 3323 struct bpf_reg_state *reg = regs + regno; 3324 struct bpf_func_state *state; 3325 int size, err = 0; 3326 3327 size = bpf_size_to_bytes(bpf_size); 3328 if (size < 0) 3329 return size; 3330 3331 /* alignment checks will add in reg->off themselves */ 3332 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once); 3333 if (err) 3334 return err; 3335 3336 /* for access checks, reg->off is just part of off */ 3337 off += reg->off; 3338 3339 if (reg->type == PTR_TO_MAP_VALUE) { 3340 if (t == BPF_WRITE && value_regno >= 0 && 3341 is_pointer_value(env, value_regno)) { 3342 verbose(env, "R%d leaks addr into map\n", value_regno); 3343 return -EACCES; 3344 } 3345 err = check_map_access_type(env, regno, off, size, t); 3346 if (err) 3347 return err; 3348 err = check_map_access(env, regno, off, size, false); 3349 if (!err && t == BPF_READ && value_regno >= 0) { 3350 struct bpf_map *map = reg->map_ptr; 3351 3352 /* if map is read-only, track its contents as scalars */ 3353 if (tnum_is_const(reg->var_off) && 3354 bpf_map_is_rdonly(map) && 3355 map->ops->map_direct_value_addr) { 3356 int map_off = off + reg->var_off.value; 3357 u64 val = 0; 3358 3359 err = bpf_map_direct_read(map, map_off, size, 3360 &val); 3361 if (err) 3362 return err; 3363 3364 regs[value_regno].type = SCALAR_VALUE; 3365 __mark_reg_known(®s[value_regno], val); 3366 } else { 3367 mark_reg_unknown(env, regs, value_regno); 3368 } 3369 } 3370 } else if (reg->type == PTR_TO_MEM) { 3371 if (t == BPF_WRITE && value_regno >= 0 && 3372 is_pointer_value(env, value_regno)) { 3373 verbose(env, "R%d leaks addr into mem\n", value_regno); 3374 return -EACCES; 3375 } 3376 err = check_mem_region_access(env, regno, off, size, 3377 reg->mem_size, false); 3378 if (!err && t == BPF_READ && value_regno >= 0) 3379 mark_reg_unknown(env, regs, value_regno); 3380 } else if (reg->type == PTR_TO_CTX) { 3381 enum bpf_reg_type reg_type = SCALAR_VALUE; 3382 u32 btf_id = 0; 3383 3384 if (t == BPF_WRITE && value_regno >= 0 && 3385 is_pointer_value(env, value_regno)) { 3386 verbose(env, "R%d leaks addr into ctx\n", value_regno); 3387 return -EACCES; 3388 } 3389 3390 err = check_ctx_reg(env, reg, regno); 3391 if (err < 0) 3392 return err; 3393 3394 err = check_ctx_access(env, insn_idx, off, size, t, ®_type, &btf_id); 3395 if (err) 3396 verbose_linfo(env, insn_idx, "; "); 3397 if (!err && t == BPF_READ && value_regno >= 0) { 3398 /* ctx access returns either a scalar, or a 3399 * PTR_TO_PACKET[_META,_END]. In the latter 3400 * case, we know the offset is zero. 3401 */ 3402 if (reg_type == SCALAR_VALUE) { 3403 mark_reg_unknown(env, regs, value_regno); 3404 } else { 3405 mark_reg_known_zero(env, regs, 3406 value_regno); 3407 if (reg_type_may_be_null(reg_type)) 3408 regs[value_regno].id = ++env->id_gen; 3409 /* A load of ctx field could have different 3410 * actual load size with the one encoded in the 3411 * insn. When the dst is PTR, it is for sure not 3412 * a sub-register. 3413 */ 3414 regs[value_regno].subreg_def = DEF_NOT_SUBREG; 3415 if (reg_type == PTR_TO_BTF_ID || 3416 reg_type == PTR_TO_BTF_ID_OR_NULL) 3417 regs[value_regno].btf_id = btf_id; 3418 } 3419 regs[value_regno].type = reg_type; 3420 } 3421 3422 } else if (reg->type == PTR_TO_STACK) { 3423 off += reg->var_off.value; 3424 err = check_stack_access(env, reg, off, size); 3425 if (err) 3426 return err; 3427 3428 state = func(env, reg); 3429 err = update_stack_depth(env, state, off); 3430 if (err) 3431 return err; 3432 3433 if (t == BPF_WRITE) 3434 err = check_stack_write(env, state, off, size, 3435 value_regno, insn_idx); 3436 else 3437 err = check_stack_read(env, state, off, size, 3438 value_regno); 3439 } else if (reg_is_pkt_pointer(reg)) { 3440 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { 3441 verbose(env, "cannot write into packet\n"); 3442 return -EACCES; 3443 } 3444 if (t == BPF_WRITE && value_regno >= 0 && 3445 is_pointer_value(env, value_regno)) { 3446 verbose(env, "R%d leaks addr into packet\n", 3447 value_regno); 3448 return -EACCES; 3449 } 3450 err = check_packet_access(env, regno, off, size, false); 3451 if (!err && t == BPF_READ && value_regno >= 0) 3452 mark_reg_unknown(env, regs, value_regno); 3453 } else if (reg->type == PTR_TO_FLOW_KEYS) { 3454 if (t == BPF_WRITE && value_regno >= 0 && 3455 is_pointer_value(env, value_regno)) { 3456 verbose(env, "R%d leaks addr into flow keys\n", 3457 value_regno); 3458 return -EACCES; 3459 } 3460 3461 err = check_flow_keys_access(env, off, size); 3462 if (!err && t == BPF_READ && value_regno >= 0) 3463 mark_reg_unknown(env, regs, value_regno); 3464 } else if (type_is_sk_pointer(reg->type)) { 3465 if (t == BPF_WRITE) { 3466 verbose(env, "R%d cannot write into %s\n", 3467 regno, reg_type_str[reg->type]); 3468 return -EACCES; 3469 } 3470 err = check_sock_access(env, insn_idx, regno, off, size, t); 3471 if (!err && value_regno >= 0) 3472 mark_reg_unknown(env, regs, value_regno); 3473 } else if (reg->type == PTR_TO_TP_BUFFER) { 3474 err = check_tp_buffer_access(env, reg, regno, off, size); 3475 if (!err && t == BPF_READ && value_regno >= 0) 3476 mark_reg_unknown(env, regs, value_regno); 3477 } else if (reg->type == PTR_TO_BTF_ID) { 3478 err = check_ptr_to_btf_access(env, regs, regno, off, size, t, 3479 value_regno); 3480 } else if (reg->type == CONST_PTR_TO_MAP) { 3481 err = check_ptr_to_map_access(env, regs, regno, off, size, t, 3482 value_regno); 3483 } else if (reg->type == PTR_TO_RDONLY_BUF) { 3484 if (t == BPF_WRITE) { 3485 verbose(env, "R%d cannot write into %s\n", 3486 regno, reg_type_str[reg->type]); 3487 return -EACCES; 3488 } 3489 err = check_buffer_access(env, reg, regno, off, size, false, 3490 "rdonly", 3491 &env->prog->aux->max_rdonly_access); 3492 if (!err && value_regno >= 0) 3493 mark_reg_unknown(env, regs, value_regno); 3494 } else if (reg->type == PTR_TO_RDWR_BUF) { 3495 err = check_buffer_access(env, reg, regno, off, size, false, 3496 "rdwr", 3497 &env->prog->aux->max_rdwr_access); 3498 if (!err && t == BPF_READ && value_regno >= 0) 3499 mark_reg_unknown(env, regs, value_regno); 3500 } else { 3501 verbose(env, "R%d invalid mem access '%s'\n", regno, 3502 reg_type_str[reg->type]); 3503 return -EACCES; 3504 } 3505 3506 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && 3507 regs[value_regno].type == SCALAR_VALUE) { 3508 /* b/h/w load zero-extends, mark upper bits as known 0 */ 3509 coerce_reg_to_size(®s[value_regno], size); 3510 } 3511 return err; 3512 } 3513 3514 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn) 3515 { 3516 int err; 3517 3518 if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) || 3519 insn->imm != 0) { 3520 verbose(env, "BPF_XADD uses reserved fields\n"); 3521 return -EINVAL; 3522 } 3523 3524 /* check src1 operand */ 3525 err = check_reg_arg(env, insn->src_reg, SRC_OP); 3526 if (err) 3527 return err; 3528 3529 /* check src2 operand */ 3530 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 3531 if (err) 3532 return err; 3533 3534 if (is_pointer_value(env, insn->src_reg)) { 3535 verbose(env, "R%d leaks addr into mem\n", insn->src_reg); 3536 return -EACCES; 3537 } 3538 3539 if (is_ctx_reg(env, insn->dst_reg) || 3540 is_pkt_reg(env, insn->dst_reg) || 3541 is_flow_key_reg(env, insn->dst_reg) || 3542 is_sk_reg(env, insn->dst_reg)) { 3543 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", 3544 insn->dst_reg, 3545 reg_type_str[reg_state(env, insn->dst_reg)->type]); 3546 return -EACCES; 3547 } 3548 3549 /* check whether atomic_add can read the memory */ 3550 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, 3551 BPF_SIZE(insn->code), BPF_READ, -1, true); 3552 if (err) 3553 return err; 3554 3555 /* check whether atomic_add can write into the same memory */ 3556 return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, 3557 BPF_SIZE(insn->code), BPF_WRITE, -1, true); 3558 } 3559 3560 static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno, 3561 int off, int access_size, 3562 bool zero_size_allowed) 3563 { 3564 struct bpf_reg_state *reg = reg_state(env, regno); 3565 3566 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || 3567 access_size < 0 || (access_size == 0 && !zero_size_allowed)) { 3568 if (tnum_is_const(reg->var_off)) { 3569 verbose(env, "invalid stack type R%d off=%d access_size=%d\n", 3570 regno, off, access_size); 3571 } else { 3572 char tn_buf[48]; 3573 3574 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 3575 verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n", 3576 regno, tn_buf, access_size); 3577 } 3578 return -EACCES; 3579 } 3580 return 0; 3581 } 3582 3583 /* when register 'regno' is passed into function that will read 'access_size' 3584 * bytes from that pointer, make sure that it's within stack boundary 3585 * and all elements of stack are initialized. 3586 * Unlike most pointer bounds-checking functions, this one doesn't take an 3587 * 'off' argument, so it has to add in reg->off itself. 3588 */ 3589 static int check_stack_boundary(struct bpf_verifier_env *env, int regno, 3590 int access_size, bool zero_size_allowed, 3591 struct bpf_call_arg_meta *meta) 3592 { 3593 struct bpf_reg_state *reg = reg_state(env, regno); 3594 struct bpf_func_state *state = func(env, reg); 3595 int err, min_off, max_off, i, j, slot, spi; 3596 3597 if (reg->type != PTR_TO_STACK) { 3598 /* Allow zero-byte read from NULL, regardless of pointer type */ 3599 if (zero_size_allowed && access_size == 0 && 3600 register_is_null(reg)) 3601 return 0; 3602 3603 verbose(env, "R%d type=%s expected=%s\n", regno, 3604 reg_type_str[reg->type], 3605 reg_type_str[PTR_TO_STACK]); 3606 return -EACCES; 3607 } 3608 3609 if (tnum_is_const(reg->var_off)) { 3610 min_off = max_off = reg->var_off.value + reg->off; 3611 err = __check_stack_boundary(env, regno, min_off, access_size, 3612 zero_size_allowed); 3613 if (err) 3614 return err; 3615 } else { 3616 /* Variable offset is prohibited for unprivileged mode for 3617 * simplicity since it requires corresponding support in 3618 * Spectre masking for stack ALU. 3619 * See also retrieve_ptr_limit(). 3620 */ 3621 if (!env->bypass_spec_v1) { 3622 char tn_buf[48]; 3623 3624 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 3625 verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n", 3626 regno, tn_buf); 3627 return -EACCES; 3628 } 3629 /* Only initialized buffer on stack is allowed to be accessed 3630 * with variable offset. With uninitialized buffer it's hard to 3631 * guarantee that whole memory is marked as initialized on 3632 * helper return since specific bounds are unknown what may 3633 * cause uninitialized stack leaking. 3634 */ 3635 if (meta && meta->raw_mode) 3636 meta = NULL; 3637 3638 if (reg->smax_value >= BPF_MAX_VAR_OFF || 3639 reg->smax_value <= -BPF_MAX_VAR_OFF) { 3640 verbose(env, "R%d unbounded indirect variable offset stack access\n", 3641 regno); 3642 return -EACCES; 3643 } 3644 min_off = reg->smin_value + reg->off; 3645 max_off = reg->smax_value + reg->off; 3646 err = __check_stack_boundary(env, regno, min_off, access_size, 3647 zero_size_allowed); 3648 if (err) { 3649 verbose(env, "R%d min value is outside of stack bound\n", 3650 regno); 3651 return err; 3652 } 3653 err = __check_stack_boundary(env, regno, max_off, access_size, 3654 zero_size_allowed); 3655 if (err) { 3656 verbose(env, "R%d max value is outside of stack bound\n", 3657 regno); 3658 return err; 3659 } 3660 } 3661 3662 if (meta && meta->raw_mode) { 3663 meta->access_size = access_size; 3664 meta->regno = regno; 3665 return 0; 3666 } 3667 3668 for (i = min_off; i < max_off + access_size; i++) { 3669 u8 *stype; 3670 3671 slot = -i - 1; 3672 spi = slot / BPF_REG_SIZE; 3673 if (state->allocated_stack <= slot) 3674 goto err; 3675 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; 3676 if (*stype == STACK_MISC) 3677 goto mark; 3678 if (*stype == STACK_ZERO) { 3679 /* helper can write anything into the stack */ 3680 *stype = STACK_MISC; 3681 goto mark; 3682 } 3683 3684 if (state->stack[spi].slot_type[0] == STACK_SPILL && 3685 state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID) 3686 goto mark; 3687 3688 if (state->stack[spi].slot_type[0] == STACK_SPILL && 3689 state->stack[spi].spilled_ptr.type == SCALAR_VALUE) { 3690 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); 3691 for (j = 0; j < BPF_REG_SIZE; j++) 3692 state->stack[spi].slot_type[j] = STACK_MISC; 3693 goto mark; 3694 } 3695 3696 err: 3697 if (tnum_is_const(reg->var_off)) { 3698 verbose(env, "invalid indirect read from stack off %d+%d size %d\n", 3699 min_off, i - min_off, access_size); 3700 } else { 3701 char tn_buf[48]; 3702 3703 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 3704 verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n", 3705 tn_buf, i - min_off, access_size); 3706 } 3707 return -EACCES; 3708 mark: 3709 /* reading any byte out of 8-byte 'spill_slot' will cause 3710 * the whole slot to be marked as 'read' 3711 */ 3712 mark_reg_read(env, &state->stack[spi].spilled_ptr, 3713 state->stack[spi].spilled_ptr.parent, 3714 REG_LIVE_READ64); 3715 } 3716 return update_stack_depth(env, state, min_off); 3717 } 3718 3719 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, 3720 int access_size, bool zero_size_allowed, 3721 struct bpf_call_arg_meta *meta) 3722 { 3723 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; 3724 3725 switch (reg->type) { 3726 case PTR_TO_PACKET: 3727 case PTR_TO_PACKET_META: 3728 return check_packet_access(env, regno, reg->off, access_size, 3729 zero_size_allowed); 3730 case PTR_TO_MAP_VALUE: 3731 if (check_map_access_type(env, regno, reg->off, access_size, 3732 meta && meta->raw_mode ? BPF_WRITE : 3733 BPF_READ)) 3734 return -EACCES; 3735 return check_map_access(env, regno, reg->off, access_size, 3736 zero_size_allowed); 3737 case PTR_TO_MEM: 3738 return check_mem_region_access(env, regno, reg->off, 3739 access_size, reg->mem_size, 3740 zero_size_allowed); 3741 case PTR_TO_RDONLY_BUF: 3742 if (meta && meta->raw_mode) 3743 return -EACCES; 3744 return check_buffer_access(env, reg, regno, reg->off, 3745 access_size, zero_size_allowed, 3746 "rdonly", 3747 &env->prog->aux->max_rdonly_access); 3748 case PTR_TO_RDWR_BUF: 3749 return check_buffer_access(env, reg, regno, reg->off, 3750 access_size, zero_size_allowed, 3751 "rdwr", 3752 &env->prog->aux->max_rdwr_access); 3753 default: /* scalar_value|ptr_to_stack or invalid ptr */ 3754 return check_stack_boundary(env, regno, access_size, 3755 zero_size_allowed, meta); 3756 } 3757 } 3758 3759 /* Implementation details: 3760 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL 3761 * Two bpf_map_lookups (even with the same key) will have different reg->id. 3762 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after 3763 * value_or_null->value transition, since the verifier only cares about 3764 * the range of access to valid map value pointer and doesn't care about actual 3765 * address of the map element. 3766 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps 3767 * reg->id > 0 after value_or_null->value transition. By doing so 3768 * two bpf_map_lookups will be considered two different pointers that 3769 * point to different bpf_spin_locks. 3770 * The verifier allows taking only one bpf_spin_lock at a time to avoid 3771 * dead-locks. 3772 * Since only one bpf_spin_lock is allowed the checks are simpler than 3773 * reg_is_refcounted() logic. The verifier needs to remember only 3774 * one spin_lock instead of array of acquired_refs. 3775 * cur_state->active_spin_lock remembers which map value element got locked 3776 * and clears it after bpf_spin_unlock. 3777 */ 3778 static int process_spin_lock(struct bpf_verifier_env *env, int regno, 3779 bool is_lock) 3780 { 3781 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; 3782 struct bpf_verifier_state *cur = env->cur_state; 3783 bool is_const = tnum_is_const(reg->var_off); 3784 struct bpf_map *map = reg->map_ptr; 3785 u64 val = reg->var_off.value; 3786 3787 if (reg->type != PTR_TO_MAP_VALUE) { 3788 verbose(env, "R%d is not a pointer to map_value\n", regno); 3789 return -EINVAL; 3790 } 3791 if (!is_const) { 3792 verbose(env, 3793 "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n", 3794 regno); 3795 return -EINVAL; 3796 } 3797 if (!map->btf) { 3798 verbose(env, 3799 "map '%s' has to have BTF in order to use bpf_spin_lock\n", 3800 map->name); 3801 return -EINVAL; 3802 } 3803 if (!map_value_has_spin_lock(map)) { 3804 if (map->spin_lock_off == -E2BIG) 3805 verbose(env, 3806 "map '%s' has more than one 'struct bpf_spin_lock'\n", 3807 map->name); 3808 else if (map->spin_lock_off == -ENOENT) 3809 verbose(env, 3810 "map '%s' doesn't have 'struct bpf_spin_lock'\n", 3811 map->name); 3812 else 3813 verbose(env, 3814 "map '%s' is not a struct type or bpf_spin_lock is mangled\n", 3815 map->name); 3816 return -EINVAL; 3817 } 3818 if (map->spin_lock_off != val + reg->off) { 3819 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n", 3820 val + reg->off); 3821 return -EINVAL; 3822 } 3823 if (is_lock) { 3824 if (cur->active_spin_lock) { 3825 verbose(env, 3826 "Locking two bpf_spin_locks are not allowed\n"); 3827 return -EINVAL; 3828 } 3829 cur->active_spin_lock = reg->id; 3830 } else { 3831 if (!cur->active_spin_lock) { 3832 verbose(env, "bpf_spin_unlock without taking a lock\n"); 3833 return -EINVAL; 3834 } 3835 if (cur->active_spin_lock != reg->id) { 3836 verbose(env, "bpf_spin_unlock of different lock\n"); 3837 return -EINVAL; 3838 } 3839 cur->active_spin_lock = 0; 3840 } 3841 return 0; 3842 } 3843 3844 static bool arg_type_is_mem_ptr(enum bpf_arg_type type) 3845 { 3846 return type == ARG_PTR_TO_MEM || 3847 type == ARG_PTR_TO_MEM_OR_NULL || 3848 type == ARG_PTR_TO_UNINIT_MEM; 3849 } 3850 3851 static bool arg_type_is_mem_size(enum bpf_arg_type type) 3852 { 3853 return type == ARG_CONST_SIZE || 3854 type == ARG_CONST_SIZE_OR_ZERO; 3855 } 3856 3857 static bool arg_type_is_alloc_mem_ptr(enum bpf_arg_type type) 3858 { 3859 return type == ARG_PTR_TO_ALLOC_MEM || 3860 type == ARG_PTR_TO_ALLOC_MEM_OR_NULL; 3861 } 3862 3863 static bool arg_type_is_alloc_size(enum bpf_arg_type type) 3864 { 3865 return type == ARG_CONST_ALLOC_SIZE_OR_ZERO; 3866 } 3867 3868 static bool arg_type_is_int_ptr(enum bpf_arg_type type) 3869 { 3870 return type == ARG_PTR_TO_INT || 3871 type == ARG_PTR_TO_LONG; 3872 } 3873 3874 static int int_ptr_type_to_size(enum bpf_arg_type type) 3875 { 3876 if (type == ARG_PTR_TO_INT) 3877 return sizeof(u32); 3878 else if (type == ARG_PTR_TO_LONG) 3879 return sizeof(u64); 3880 3881 return -EINVAL; 3882 } 3883 3884 static int resolve_map_arg_type(struct bpf_verifier_env *env, 3885 const struct bpf_call_arg_meta *meta, 3886 enum bpf_arg_type *arg_type) 3887 { 3888 if (!meta->map_ptr) { 3889 /* kernel subsystem misconfigured verifier */ 3890 verbose(env, "invalid map_ptr to access map->type\n"); 3891 return -EACCES; 3892 } 3893 3894 switch (meta->map_ptr->map_type) { 3895 case BPF_MAP_TYPE_SOCKMAP: 3896 case BPF_MAP_TYPE_SOCKHASH: 3897 if (*arg_type == ARG_PTR_TO_MAP_VALUE) { 3898 *arg_type = ARG_PTR_TO_SOCKET; 3899 } else { 3900 verbose(env, "invalid arg_type for sockmap/sockhash\n"); 3901 return -EINVAL; 3902 } 3903 break; 3904 3905 default: 3906 break; 3907 } 3908 return 0; 3909 } 3910 3911 static int check_func_arg(struct bpf_verifier_env *env, u32 arg, 3912 struct bpf_call_arg_meta *meta, 3913 const struct bpf_func_proto *fn) 3914 { 3915 u32 regno = BPF_REG_1 + arg; 3916 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; 3917 enum bpf_reg_type expected_type, type = reg->type; 3918 enum bpf_arg_type arg_type = fn->arg_type[arg]; 3919 int err = 0; 3920 3921 if (arg_type == ARG_DONTCARE) 3922 return 0; 3923 3924 err = check_reg_arg(env, regno, SRC_OP); 3925 if (err) 3926 return err; 3927 3928 if (arg_type == ARG_ANYTHING) { 3929 if (is_pointer_value(env, regno)) { 3930 verbose(env, "R%d leaks addr into helper function\n", 3931 regno); 3932 return -EACCES; 3933 } 3934 return 0; 3935 } 3936 3937 if (type_is_pkt_pointer(type) && 3938 !may_access_direct_pkt_data(env, meta, BPF_READ)) { 3939 verbose(env, "helper access to the packet is not allowed\n"); 3940 return -EACCES; 3941 } 3942 3943 if (arg_type == ARG_PTR_TO_MAP_VALUE || 3944 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || 3945 arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { 3946 err = resolve_map_arg_type(env, meta, &arg_type); 3947 if (err) 3948 return err; 3949 } 3950 3951 if (arg_type == ARG_PTR_TO_MAP_KEY || 3952 arg_type == ARG_PTR_TO_MAP_VALUE || 3953 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || 3954 arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { 3955 expected_type = PTR_TO_STACK; 3956 if (register_is_null(reg) && 3957 arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) 3958 /* final test in check_stack_boundary() */; 3959 else if (!type_is_pkt_pointer(type) && 3960 type != PTR_TO_MAP_VALUE && 3961 type != expected_type) 3962 goto err_type; 3963 } else if (arg_type == ARG_CONST_SIZE || 3964 arg_type == ARG_CONST_SIZE_OR_ZERO || 3965 arg_type == ARG_CONST_ALLOC_SIZE_OR_ZERO) { 3966 expected_type = SCALAR_VALUE; 3967 if (type != expected_type) 3968 goto err_type; 3969 } else if (arg_type == ARG_CONST_MAP_PTR) { 3970 expected_type = CONST_PTR_TO_MAP; 3971 if (type != expected_type) 3972 goto err_type; 3973 } else if (arg_type == ARG_PTR_TO_CTX || 3974 arg_type == ARG_PTR_TO_CTX_OR_NULL) { 3975 expected_type = PTR_TO_CTX; 3976 if (!(register_is_null(reg) && 3977 arg_type == ARG_PTR_TO_CTX_OR_NULL)) { 3978 if (type != expected_type) 3979 goto err_type; 3980 err = check_ctx_reg(env, reg, regno); 3981 if (err < 0) 3982 return err; 3983 } 3984 } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) { 3985 expected_type = PTR_TO_SOCK_COMMON; 3986 /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ 3987 if (!type_is_sk_pointer(type)) 3988 goto err_type; 3989 if (reg->ref_obj_id) { 3990 if (meta->ref_obj_id) { 3991 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", 3992 regno, reg->ref_obj_id, 3993 meta->ref_obj_id); 3994 return -EFAULT; 3995 } 3996 meta->ref_obj_id = reg->ref_obj_id; 3997 } 3998 } else if (arg_type == ARG_PTR_TO_SOCKET || 3999 arg_type == ARG_PTR_TO_SOCKET_OR_NULL) { 4000 expected_type = PTR_TO_SOCKET; 4001 if (!(register_is_null(reg) && 4002 arg_type == ARG_PTR_TO_SOCKET_OR_NULL)) { 4003 if (type != expected_type) 4004 goto err_type; 4005 } 4006 } else if (arg_type == ARG_PTR_TO_BTF_ID) { 4007 bool ids_match = false; 4008 4009 expected_type = PTR_TO_BTF_ID; 4010 if (type != expected_type) 4011 goto err_type; 4012 if (!fn->check_btf_id) { 4013 if (reg->btf_id != meta->btf_id) { 4014 ids_match = btf_struct_ids_match(&env->log, reg->off, reg->btf_id, 4015 meta->btf_id); 4016 if (!ids_match) { 4017 verbose(env, "Helper has type %s got %s in R%d\n", 4018 kernel_type_name(meta->btf_id), 4019 kernel_type_name(reg->btf_id), regno); 4020 return -EACCES; 4021 } 4022 } 4023 } else if (!fn->check_btf_id(reg->btf_id, arg)) { 4024 verbose(env, "Helper does not support %s in R%d\n", 4025 kernel_type_name(reg->btf_id), regno); 4026 4027 return -EACCES; 4028 } 4029 if ((reg->off && !ids_match) || !tnum_is_const(reg->var_off) || reg->var_off.value) { 4030 verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n", 4031 regno); 4032 return -EACCES; 4033 } 4034 } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { 4035 if (meta->func_id == BPF_FUNC_spin_lock) { 4036 if (process_spin_lock(env, regno, true)) 4037 return -EACCES; 4038 } else if (meta->func_id == BPF_FUNC_spin_unlock) { 4039 if (process_spin_lock(env, regno, false)) 4040 return -EACCES; 4041 } else { 4042 verbose(env, "verifier internal error\n"); 4043 return -EFAULT; 4044 } 4045 } else if (arg_type_is_mem_ptr(arg_type)) { 4046 expected_type = PTR_TO_STACK; 4047 /* One exception here. In case function allows for NULL to be 4048 * passed in as argument, it's a SCALAR_VALUE type. Final test 4049 * happens during stack boundary checking. 4050 */ 4051 if (register_is_null(reg) && 4052 (arg_type == ARG_PTR_TO_MEM_OR_NULL || 4053 arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL)) 4054 /* final test in check_stack_boundary() */; 4055 else if (!type_is_pkt_pointer(type) && 4056 type != PTR_TO_MAP_VALUE && 4057 type != PTR_TO_MEM && 4058 type != PTR_TO_RDONLY_BUF && 4059 type != PTR_TO_RDWR_BUF && 4060 type != expected_type) 4061 goto err_type; 4062 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM; 4063 } else if (arg_type_is_alloc_mem_ptr(arg_type)) { 4064 expected_type = PTR_TO_MEM; 4065 if (register_is_null(reg) && 4066 arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL) 4067 /* final test in check_stack_boundary() */; 4068 else if (type != expected_type) 4069 goto err_type; 4070 if (meta->ref_obj_id) { 4071 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", 4072 regno, reg->ref_obj_id, 4073 meta->ref_obj_id); 4074 return -EFAULT; 4075 } 4076 meta->ref_obj_id = reg->ref_obj_id; 4077 } else if (arg_type_is_int_ptr(arg_type)) { 4078 expected_type = PTR_TO_STACK; 4079 if (!type_is_pkt_pointer(type) && 4080 type != PTR_TO_MAP_VALUE && 4081 type != expected_type) 4082 goto err_type; 4083 } else { 4084 verbose(env, "unsupported arg_type %d\n", arg_type); 4085 return -EFAULT; 4086 } 4087 4088 if (arg_type == ARG_CONST_MAP_PTR) { 4089 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */ 4090 meta->map_ptr = reg->map_ptr; 4091 } else if (arg_type == ARG_PTR_TO_MAP_KEY) { 4092 /* bpf_map_xxx(..., map_ptr, ..., key) call: 4093 * check that [key, key + map->key_size) are within 4094 * stack limits and initialized 4095 */ 4096 if (!meta->map_ptr) { 4097 /* in function declaration map_ptr must come before 4098 * map_key, so that it's verified and known before 4099 * we have to check map_key here. Otherwise it means 4100 * that kernel subsystem misconfigured verifier 4101 */ 4102 verbose(env, "invalid map_ptr to access map->key\n"); 4103 return -EACCES; 4104 } 4105 err = check_helper_mem_access(env, regno, 4106 meta->map_ptr->key_size, false, 4107 NULL); 4108 } else if (arg_type == ARG_PTR_TO_MAP_VALUE || 4109 (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL && 4110 !register_is_null(reg)) || 4111 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { 4112 /* bpf_map_xxx(..., map_ptr, ..., value) call: 4113 * check [value, value + map->value_size) validity 4114 */ 4115 if (!meta->map_ptr) { 4116 /* kernel subsystem misconfigured verifier */ 4117 verbose(env, "invalid map_ptr to access map->value\n"); 4118 return -EACCES; 4119 } 4120 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE); 4121 err = check_helper_mem_access(env, regno, 4122 meta->map_ptr->value_size, false, 4123 meta); 4124 } else if (arg_type_is_mem_size(arg_type)) { 4125 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); 4126 4127 /* This is used to refine r0 return value bounds for helpers 4128 * that enforce this value as an upper bound on return values. 4129 * See do_refine_retval_range() for helpers that can refine 4130 * the return value. C type of helper is u32 so we pull register 4131 * bound from umax_value however, if negative verifier errors 4132 * out. Only upper bounds can be learned because retval is an 4133 * int type and negative retvals are allowed. 4134 */ 4135 meta->msize_max_value = reg->umax_value; 4136 4137 /* The register is SCALAR_VALUE; the access check 4138 * happens using its boundaries. 4139 */ 4140 if (!tnum_is_const(reg->var_off)) 4141 /* For unprivileged variable accesses, disable raw 4142 * mode so that the program is required to 4143 * initialize all the memory that the helper could 4144 * just partially fill up. 4145 */ 4146 meta = NULL; 4147 4148 if (reg->smin_value < 0) { 4149 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", 4150 regno); 4151 return -EACCES; 4152 } 4153 4154 if (reg->umin_value == 0) { 4155 err = check_helper_mem_access(env, regno - 1, 0, 4156 zero_size_allowed, 4157 meta); 4158 if (err) 4159 return err; 4160 } 4161 4162 if (reg->umax_value >= BPF_MAX_VAR_SIZ) { 4163 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", 4164 regno); 4165 return -EACCES; 4166 } 4167 err = check_helper_mem_access(env, regno - 1, 4168 reg->umax_value, 4169 zero_size_allowed, meta); 4170 if (!err) 4171 err = mark_chain_precision(env, regno); 4172 } else if (arg_type_is_alloc_size(arg_type)) { 4173 if (!tnum_is_const(reg->var_off)) { 4174 verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n", 4175 regno); 4176 return -EACCES; 4177 } 4178 meta->mem_size = reg->var_off.value; 4179 } else if (arg_type_is_int_ptr(arg_type)) { 4180 int size = int_ptr_type_to_size(arg_type); 4181 4182 err = check_helper_mem_access(env, regno, size, false, meta); 4183 if (err) 4184 return err; 4185 err = check_ptr_alignment(env, reg, 0, size, true); 4186 } 4187 4188 return err; 4189 err_type: 4190 verbose(env, "R%d type=%s expected=%s\n", regno, 4191 reg_type_str[type], reg_type_str[expected_type]); 4192 return -EACCES; 4193 } 4194 4195 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) 4196 { 4197 enum bpf_attach_type eatype = env->prog->expected_attach_type; 4198 enum bpf_prog_type type = resolve_prog_type(env->prog); 4199 4200 if (func_id != BPF_FUNC_map_update_elem) 4201 return false; 4202 4203 /* It's not possible to get access to a locked struct sock in these 4204 * contexts, so updating is safe. 4205 */ 4206 switch (type) { 4207 case BPF_PROG_TYPE_TRACING: 4208 if (eatype == BPF_TRACE_ITER) 4209 return true; 4210 break; 4211 case BPF_PROG_TYPE_SOCKET_FILTER: 4212 case BPF_PROG_TYPE_SCHED_CLS: 4213 case BPF_PROG_TYPE_SCHED_ACT: 4214 case BPF_PROG_TYPE_XDP: 4215 case BPF_PROG_TYPE_SK_REUSEPORT: 4216 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4217 case BPF_PROG_TYPE_SK_LOOKUP: 4218 return true; 4219 default: 4220 break; 4221 } 4222 4223 verbose(env, "cannot update sockmap in this context\n"); 4224 return false; 4225 } 4226 4227 static int check_map_func_compatibility(struct bpf_verifier_env *env, 4228 struct bpf_map *map, int func_id) 4229 { 4230 if (!map) 4231 return 0; 4232 4233 /* We need a two way check, first is from map perspective ... */ 4234 switch (map->map_type) { 4235 case BPF_MAP_TYPE_PROG_ARRAY: 4236 if (func_id != BPF_FUNC_tail_call) 4237 goto error; 4238 break; 4239 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 4240 if (func_id != BPF_FUNC_perf_event_read && 4241 func_id != BPF_FUNC_perf_event_output && 4242 func_id != BPF_FUNC_skb_output && 4243 func_id != BPF_FUNC_perf_event_read_value && 4244 func_id != BPF_FUNC_xdp_output) 4245 goto error; 4246 break; 4247 case BPF_MAP_TYPE_RINGBUF: 4248 if (func_id != BPF_FUNC_ringbuf_output && 4249 func_id != BPF_FUNC_ringbuf_reserve && 4250 func_id != BPF_FUNC_ringbuf_submit && 4251 func_id != BPF_FUNC_ringbuf_discard && 4252 func_id != BPF_FUNC_ringbuf_query) 4253 goto error; 4254 break; 4255 case BPF_MAP_TYPE_STACK_TRACE: 4256 if (func_id != BPF_FUNC_get_stackid) 4257 goto error; 4258 break; 4259 case BPF_MAP_TYPE_CGROUP_ARRAY: 4260 if (func_id != BPF_FUNC_skb_under_cgroup && 4261 func_id != BPF_FUNC_current_task_under_cgroup) 4262 goto error; 4263 break; 4264 case BPF_MAP_TYPE_CGROUP_STORAGE: 4265 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: 4266 if (func_id != BPF_FUNC_get_local_storage) 4267 goto error; 4268 break; 4269 case BPF_MAP_TYPE_DEVMAP: 4270 case BPF_MAP_TYPE_DEVMAP_HASH: 4271 if (func_id != BPF_FUNC_redirect_map && 4272 func_id != BPF_FUNC_map_lookup_elem) 4273 goto error; 4274 break; 4275 /* Restrict bpf side of cpumap and xskmap, open when use-cases 4276 * appear. 4277 */ 4278 case BPF_MAP_TYPE_CPUMAP: 4279 if (func_id != BPF_FUNC_redirect_map) 4280 goto error; 4281 break; 4282 case BPF_MAP_TYPE_XSKMAP: 4283 if (func_id != BPF_FUNC_redirect_map && 4284 func_id != BPF_FUNC_map_lookup_elem) 4285 goto error; 4286 break; 4287 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 4288 case BPF_MAP_TYPE_HASH_OF_MAPS: 4289 if (func_id != BPF_FUNC_map_lookup_elem) 4290 goto error; 4291 break; 4292 case BPF_MAP_TYPE_SOCKMAP: 4293 if (func_id != BPF_FUNC_sk_redirect_map && 4294 func_id != BPF_FUNC_sock_map_update && 4295 func_id != BPF_FUNC_map_delete_elem && 4296 func_id != BPF_FUNC_msg_redirect_map && 4297 func_id != BPF_FUNC_sk_select_reuseport && 4298 func_id != BPF_FUNC_map_lookup_elem && 4299 !may_update_sockmap(env, func_id)) 4300 goto error; 4301 break; 4302 case BPF_MAP_TYPE_SOCKHASH: 4303 if (func_id != BPF_FUNC_sk_redirect_hash && 4304 func_id != BPF_FUNC_sock_hash_update && 4305 func_id != BPF_FUNC_map_delete_elem && 4306 func_id != BPF_FUNC_msg_redirect_hash && 4307 func_id != BPF_FUNC_sk_select_reuseport && 4308 func_id != BPF_FUNC_map_lookup_elem && 4309 !may_update_sockmap(env, func_id)) 4310 goto error; 4311 break; 4312 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: 4313 if (func_id != BPF_FUNC_sk_select_reuseport) 4314 goto error; 4315 break; 4316 case BPF_MAP_TYPE_QUEUE: 4317 case BPF_MAP_TYPE_STACK: 4318 if (func_id != BPF_FUNC_map_peek_elem && 4319 func_id != BPF_FUNC_map_pop_elem && 4320 func_id != BPF_FUNC_map_push_elem) 4321 goto error; 4322 break; 4323 case BPF_MAP_TYPE_SK_STORAGE: 4324 if (func_id != BPF_FUNC_sk_storage_get && 4325 func_id != BPF_FUNC_sk_storage_delete) 4326 goto error; 4327 break; 4328 case BPF_MAP_TYPE_INODE_STORAGE: 4329 if (func_id != BPF_FUNC_inode_storage_get && 4330 func_id != BPF_FUNC_inode_storage_delete) 4331 goto error; 4332 break; 4333 default: 4334 break; 4335 } 4336 4337 /* ... and second from the function itself. */ 4338 switch (func_id) { 4339 case BPF_FUNC_tail_call: 4340 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) 4341 goto error; 4342 if (env->subprog_cnt > 1) { 4343 verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n"); 4344 return -EINVAL; 4345 } 4346 break; 4347 case BPF_FUNC_perf_event_read: 4348 case BPF_FUNC_perf_event_output: 4349 case BPF_FUNC_perf_event_read_value: 4350 case BPF_FUNC_skb_output: 4351 case BPF_FUNC_xdp_output: 4352 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) 4353 goto error; 4354 break; 4355 case BPF_FUNC_get_stackid: 4356 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) 4357 goto error; 4358 break; 4359 case BPF_FUNC_current_task_under_cgroup: 4360 case BPF_FUNC_skb_under_cgroup: 4361 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) 4362 goto error; 4363 break; 4364 case BPF_FUNC_redirect_map: 4365 if (map->map_type != BPF_MAP_TYPE_DEVMAP && 4366 map->map_type != BPF_MAP_TYPE_DEVMAP_HASH && 4367 map->map_type != BPF_MAP_TYPE_CPUMAP && 4368 map->map_type != BPF_MAP_TYPE_XSKMAP) 4369 goto error; 4370 break; 4371 case BPF_FUNC_sk_redirect_map: 4372 case BPF_FUNC_msg_redirect_map: 4373 case BPF_FUNC_sock_map_update: 4374 if (map->map_type != BPF_MAP_TYPE_SOCKMAP) 4375 goto error; 4376 break; 4377 case BPF_FUNC_sk_redirect_hash: 4378 case BPF_FUNC_msg_redirect_hash: 4379 case BPF_FUNC_sock_hash_update: 4380 if (map->map_type != BPF_MAP_TYPE_SOCKHASH) 4381 goto error; 4382 break; 4383 case BPF_FUNC_get_local_storage: 4384 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 4385 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 4386 goto error; 4387 break; 4388 case BPF_FUNC_sk_select_reuseport: 4389 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && 4390 map->map_type != BPF_MAP_TYPE_SOCKMAP && 4391 map->map_type != BPF_MAP_TYPE_SOCKHASH) 4392 goto error; 4393 break; 4394 case BPF_FUNC_map_peek_elem: 4395 case BPF_FUNC_map_pop_elem: 4396 case BPF_FUNC_map_push_elem: 4397 if (map->map_type != BPF_MAP_TYPE_QUEUE && 4398 map->map_type != BPF_MAP_TYPE_STACK) 4399 goto error; 4400 break; 4401 case BPF_FUNC_sk_storage_get: 4402 case BPF_FUNC_sk_storage_delete: 4403 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) 4404 goto error; 4405 break; 4406 case BPF_FUNC_inode_storage_get: 4407 case BPF_FUNC_inode_storage_delete: 4408 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE) 4409 goto error; 4410 break; 4411 default: 4412 break; 4413 } 4414 4415 return 0; 4416 error: 4417 verbose(env, "cannot pass map_type %d into func %s#%d\n", 4418 map->map_type, func_id_name(func_id), func_id); 4419 return -EINVAL; 4420 } 4421 4422 static bool check_raw_mode_ok(const struct bpf_func_proto *fn) 4423 { 4424 int count = 0; 4425 4426 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM) 4427 count++; 4428 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM) 4429 count++; 4430 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM) 4431 count++; 4432 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM) 4433 count++; 4434 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM) 4435 count++; 4436 4437 /* We only support one arg being in raw mode at the moment, 4438 * which is sufficient for the helper functions we have 4439 * right now. 4440 */ 4441 return count <= 1; 4442 } 4443 4444 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, 4445 enum bpf_arg_type arg_next) 4446 { 4447 return (arg_type_is_mem_ptr(arg_curr) && 4448 !arg_type_is_mem_size(arg_next)) || 4449 (!arg_type_is_mem_ptr(arg_curr) && 4450 arg_type_is_mem_size(arg_next)); 4451 } 4452 4453 static bool check_arg_pair_ok(const struct bpf_func_proto *fn) 4454 { 4455 /* bpf_xxx(..., buf, len) call will access 'len' 4456 * bytes from memory 'buf'. Both arg types need 4457 * to be paired, so make sure there's no buggy 4458 * helper function specification. 4459 */ 4460 if (arg_type_is_mem_size(fn->arg1_type) || 4461 arg_type_is_mem_ptr(fn->arg5_type) || 4462 check_args_pair_invalid(fn->arg1_type, fn->arg2_type) || 4463 check_args_pair_invalid(fn->arg2_type, fn->arg3_type) || 4464 check_args_pair_invalid(fn->arg3_type, fn->arg4_type) || 4465 check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) 4466 return false; 4467 4468 return true; 4469 } 4470 4471 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id) 4472 { 4473 int count = 0; 4474 4475 if (arg_type_may_be_refcounted(fn->arg1_type)) 4476 count++; 4477 if (arg_type_may_be_refcounted(fn->arg2_type)) 4478 count++; 4479 if (arg_type_may_be_refcounted(fn->arg3_type)) 4480 count++; 4481 if (arg_type_may_be_refcounted(fn->arg4_type)) 4482 count++; 4483 if (arg_type_may_be_refcounted(fn->arg5_type)) 4484 count++; 4485 4486 /* A reference acquiring function cannot acquire 4487 * another refcounted ptr. 4488 */ 4489 if (may_be_acquire_function(func_id) && count) 4490 return false; 4491 4492 /* We only support one arg being unreferenced at the moment, 4493 * which is sufficient for the helper functions we have right now. 4494 */ 4495 return count <= 1; 4496 } 4497 4498 static int check_func_proto(const struct bpf_func_proto *fn, int func_id) 4499 { 4500 return check_raw_mode_ok(fn) && 4501 check_arg_pair_ok(fn) && 4502 check_refcount_ok(fn, func_id) ? 0 : -EINVAL; 4503 } 4504 4505 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] 4506 * are now invalid, so turn them into unknown SCALAR_VALUE. 4507 */ 4508 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, 4509 struct bpf_func_state *state) 4510 { 4511 struct bpf_reg_state *regs = state->regs, *reg; 4512 int i; 4513 4514 for (i = 0; i < MAX_BPF_REG; i++) 4515 if (reg_is_pkt_pointer_any(®s[i])) 4516 mark_reg_unknown(env, regs, i); 4517 4518 bpf_for_each_spilled_reg(i, state, reg) { 4519 if (!reg) 4520 continue; 4521 if (reg_is_pkt_pointer_any(reg)) 4522 __mark_reg_unknown(env, reg); 4523 } 4524 } 4525 4526 static void clear_all_pkt_pointers(struct bpf_verifier_env *env) 4527 { 4528 struct bpf_verifier_state *vstate = env->cur_state; 4529 int i; 4530 4531 for (i = 0; i <= vstate->curframe; i++) 4532 __clear_all_pkt_pointers(env, vstate->frame[i]); 4533 } 4534 4535 static void release_reg_references(struct bpf_verifier_env *env, 4536 struct bpf_func_state *state, 4537 int ref_obj_id) 4538 { 4539 struct bpf_reg_state *regs = state->regs, *reg; 4540 int i; 4541 4542 for (i = 0; i < MAX_BPF_REG; i++) 4543 if (regs[i].ref_obj_id == ref_obj_id) 4544 mark_reg_unknown(env, regs, i); 4545 4546 bpf_for_each_spilled_reg(i, state, reg) { 4547 if (!reg) 4548 continue; 4549 if (reg->ref_obj_id == ref_obj_id) 4550 __mark_reg_unknown(env, reg); 4551 } 4552 } 4553 4554 /* The pointer with the specified id has released its reference to kernel 4555 * resources. Identify all copies of the same pointer and clear the reference. 4556 */ 4557 static int release_reference(struct bpf_verifier_env *env, 4558 int ref_obj_id) 4559 { 4560 struct bpf_verifier_state *vstate = env->cur_state; 4561 int err; 4562 int i; 4563 4564 err = release_reference_state(cur_func(env), ref_obj_id); 4565 if (err) 4566 return err; 4567 4568 for (i = 0; i <= vstate->curframe; i++) 4569 release_reg_references(env, vstate->frame[i], ref_obj_id); 4570 4571 return 0; 4572 } 4573 4574 static void clear_caller_saved_regs(struct bpf_verifier_env *env, 4575 struct bpf_reg_state *regs) 4576 { 4577 int i; 4578 4579 /* after the call registers r0 - r5 were scratched */ 4580 for (i = 0; i < CALLER_SAVED_REGS; i++) { 4581 mark_reg_not_init(env, regs, caller_saved[i]); 4582 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 4583 } 4584 } 4585 4586 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 4587 int *insn_idx) 4588 { 4589 struct bpf_verifier_state *state = env->cur_state; 4590 struct bpf_func_info_aux *func_info_aux; 4591 struct bpf_func_state *caller, *callee; 4592 int i, err, subprog, target_insn; 4593 bool is_global = false; 4594 4595 if (state->curframe + 1 >= MAX_CALL_FRAMES) { 4596 verbose(env, "the call stack of %d frames is too deep\n", 4597 state->curframe + 2); 4598 return -E2BIG; 4599 } 4600 4601 target_insn = *insn_idx + insn->imm; 4602 subprog = find_subprog(env, target_insn + 1); 4603 if (subprog < 0) { 4604 verbose(env, "verifier bug. No program starts at insn %d\n", 4605 target_insn + 1); 4606 return -EFAULT; 4607 } 4608 4609 caller = state->frame[state->curframe]; 4610 if (state->frame[state->curframe + 1]) { 4611 verbose(env, "verifier bug. Frame %d already allocated\n", 4612 state->curframe + 1); 4613 return -EFAULT; 4614 } 4615 4616 func_info_aux = env->prog->aux->func_info_aux; 4617 if (func_info_aux) 4618 is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; 4619 err = btf_check_func_arg_match(env, subprog, caller->regs); 4620 if (err == -EFAULT) 4621 return err; 4622 if (is_global) { 4623 if (err) { 4624 verbose(env, "Caller passes invalid args into func#%d\n", 4625 subprog); 4626 return err; 4627 } else { 4628 if (env->log.level & BPF_LOG_LEVEL) 4629 verbose(env, 4630 "Func#%d is global and valid. Skipping.\n", 4631 subprog); 4632 clear_caller_saved_regs(env, caller->regs); 4633 4634 /* All global functions return SCALAR_VALUE */ 4635 mark_reg_unknown(env, caller->regs, BPF_REG_0); 4636 4637 /* continue with next insn after call */ 4638 return 0; 4639 } 4640 } 4641 4642 callee = kzalloc(sizeof(*callee), GFP_KERNEL); 4643 if (!callee) 4644 return -ENOMEM; 4645 state->frame[state->curframe + 1] = callee; 4646 4647 /* callee cannot access r0, r6 - r9 for reading and has to write 4648 * into its own stack before reading from it. 4649 * callee can read/write into caller's stack 4650 */ 4651 init_func_state(env, callee, 4652 /* remember the callsite, it will be used by bpf_exit */ 4653 *insn_idx /* callsite */, 4654 state->curframe + 1 /* frameno within this callchain */, 4655 subprog /* subprog number within this prog */); 4656 4657 /* Transfer references to the callee */ 4658 err = transfer_reference_state(callee, caller); 4659 if (err) 4660 return err; 4661 4662 /* copy r1 - r5 args that callee can access. The copy includes parent 4663 * pointers, which connects us up to the liveness chain 4664 */ 4665 for (i = BPF_REG_1; i <= BPF_REG_5; i++) 4666 callee->regs[i] = caller->regs[i]; 4667 4668 clear_caller_saved_regs(env, caller->regs); 4669 4670 /* only increment it after check_reg_arg() finished */ 4671 state->curframe++; 4672 4673 /* and go analyze first insn of the callee */ 4674 *insn_idx = target_insn; 4675 4676 if (env->log.level & BPF_LOG_LEVEL) { 4677 verbose(env, "caller:\n"); 4678 print_verifier_state(env, caller); 4679 verbose(env, "callee:\n"); 4680 print_verifier_state(env, callee); 4681 } 4682 return 0; 4683 } 4684 4685 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) 4686 { 4687 struct bpf_verifier_state *state = env->cur_state; 4688 struct bpf_func_state *caller, *callee; 4689 struct bpf_reg_state *r0; 4690 int err; 4691 4692 callee = state->frame[state->curframe]; 4693 r0 = &callee->regs[BPF_REG_0]; 4694 if (r0->type == PTR_TO_STACK) { 4695 /* technically it's ok to return caller's stack pointer 4696 * (or caller's caller's pointer) back to the caller, 4697 * since these pointers are valid. Only current stack 4698 * pointer will be invalid as soon as function exits, 4699 * but let's be conservative 4700 */ 4701 verbose(env, "cannot return stack pointer to the caller\n"); 4702 return -EINVAL; 4703 } 4704 4705 state->curframe--; 4706 caller = state->frame[state->curframe]; 4707 /* return to the caller whatever r0 had in the callee */ 4708 caller->regs[BPF_REG_0] = *r0; 4709 4710 /* Transfer references to the caller */ 4711 err = transfer_reference_state(caller, callee); 4712 if (err) 4713 return err; 4714 4715 *insn_idx = callee->callsite + 1; 4716 if (env->log.level & BPF_LOG_LEVEL) { 4717 verbose(env, "returning from callee:\n"); 4718 print_verifier_state(env, callee); 4719 verbose(env, "to caller at %d:\n", *insn_idx); 4720 print_verifier_state(env, caller); 4721 } 4722 /* clear everything in the callee */ 4723 free_func_state(callee); 4724 state->frame[state->curframe + 1] = NULL; 4725 return 0; 4726 } 4727 4728 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, 4729 int func_id, 4730 struct bpf_call_arg_meta *meta) 4731 { 4732 struct bpf_reg_state *ret_reg = ®s[BPF_REG_0]; 4733 4734 if (ret_type != RET_INTEGER || 4735 (func_id != BPF_FUNC_get_stack && 4736 func_id != BPF_FUNC_probe_read_str && 4737 func_id != BPF_FUNC_probe_read_kernel_str && 4738 func_id != BPF_FUNC_probe_read_user_str)) 4739 return; 4740 4741 ret_reg->smax_value = meta->msize_max_value; 4742 ret_reg->s32_max_value = meta->msize_max_value; 4743 __reg_deduce_bounds(ret_reg); 4744 __reg_bound_offset(ret_reg); 4745 __update_reg_bounds(ret_reg); 4746 } 4747 4748 static int 4749 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, 4750 int func_id, int insn_idx) 4751 { 4752 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; 4753 struct bpf_map *map = meta->map_ptr; 4754 4755 if (func_id != BPF_FUNC_tail_call && 4756 func_id != BPF_FUNC_map_lookup_elem && 4757 func_id != BPF_FUNC_map_update_elem && 4758 func_id != BPF_FUNC_map_delete_elem && 4759 func_id != BPF_FUNC_map_push_elem && 4760 func_id != BPF_FUNC_map_pop_elem && 4761 func_id != BPF_FUNC_map_peek_elem) 4762 return 0; 4763 4764 if (map == NULL) { 4765 verbose(env, "kernel subsystem misconfigured verifier\n"); 4766 return -EINVAL; 4767 } 4768 4769 /* In case of read-only, some additional restrictions 4770 * need to be applied in order to prevent altering the 4771 * state of the map from program side. 4772 */ 4773 if ((map->map_flags & BPF_F_RDONLY_PROG) && 4774 (func_id == BPF_FUNC_map_delete_elem || 4775 func_id == BPF_FUNC_map_update_elem || 4776 func_id == BPF_FUNC_map_push_elem || 4777 func_id == BPF_FUNC_map_pop_elem)) { 4778 verbose(env, "write into map forbidden\n"); 4779 return -EACCES; 4780 } 4781 4782 if (!BPF_MAP_PTR(aux->map_ptr_state)) 4783 bpf_map_ptr_store(aux, meta->map_ptr, 4784 !meta->map_ptr->bypass_spec_v1); 4785 else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr) 4786 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON, 4787 !meta->map_ptr->bypass_spec_v1); 4788 return 0; 4789 } 4790 4791 static int 4792 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, 4793 int func_id, int insn_idx) 4794 { 4795 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; 4796 struct bpf_reg_state *regs = cur_regs(env), *reg; 4797 struct bpf_map *map = meta->map_ptr; 4798 struct tnum range; 4799 u64 val; 4800 int err; 4801 4802 if (func_id != BPF_FUNC_tail_call) 4803 return 0; 4804 if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) { 4805 verbose(env, "kernel subsystem misconfigured verifier\n"); 4806 return -EINVAL; 4807 } 4808 4809 range = tnum_range(0, map->max_entries - 1); 4810 reg = ®s[BPF_REG_3]; 4811 4812 if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) { 4813 bpf_map_key_store(aux, BPF_MAP_KEY_POISON); 4814 return 0; 4815 } 4816 4817 err = mark_chain_precision(env, BPF_REG_3); 4818 if (err) 4819 return err; 4820 4821 val = reg->var_off.value; 4822 if (bpf_map_key_unseen(aux)) 4823 bpf_map_key_store(aux, val); 4824 else if (!bpf_map_key_poisoned(aux) && 4825 bpf_map_key_immediate(aux) != val) 4826 bpf_map_key_store(aux, BPF_MAP_KEY_POISON); 4827 return 0; 4828 } 4829 4830 static int check_reference_leak(struct bpf_verifier_env *env) 4831 { 4832 struct bpf_func_state *state = cur_func(env); 4833 int i; 4834 4835 for (i = 0; i < state->acquired_refs; i++) { 4836 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", 4837 state->refs[i].id, state->refs[i].insn_idx); 4838 } 4839 return state->acquired_refs ? -EINVAL : 0; 4840 } 4841 4842 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) 4843 { 4844 const struct bpf_func_proto *fn = NULL; 4845 struct bpf_reg_state *regs; 4846 struct bpf_call_arg_meta meta; 4847 bool changes_data; 4848 int i, err; 4849 4850 /* find function prototype */ 4851 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { 4852 verbose(env, "invalid func %s#%d\n", func_id_name(func_id), 4853 func_id); 4854 return -EINVAL; 4855 } 4856 4857 if (env->ops->get_func_proto) 4858 fn = env->ops->get_func_proto(func_id, env->prog); 4859 if (!fn) { 4860 verbose(env, "unknown func %s#%d\n", func_id_name(func_id), 4861 func_id); 4862 return -EINVAL; 4863 } 4864 4865 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 4866 if (!env->prog->gpl_compatible && fn->gpl_only) { 4867 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n"); 4868 return -EINVAL; 4869 } 4870 4871 if (fn->allowed && !fn->allowed(env->prog)) { 4872 verbose(env, "helper call is not allowed in probe\n"); 4873 return -EINVAL; 4874 } 4875 4876 /* With LD_ABS/IND some JITs save/restore skb from r1. */ 4877 changes_data = bpf_helper_changes_pkt_data(fn->func); 4878 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { 4879 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n", 4880 func_id_name(func_id), func_id); 4881 return -EINVAL; 4882 } 4883 4884 memset(&meta, 0, sizeof(meta)); 4885 meta.pkt_access = fn->pkt_access; 4886 4887 err = check_func_proto(fn, func_id); 4888 if (err) { 4889 verbose(env, "kernel subsystem misconfigured func %s#%d\n", 4890 func_id_name(func_id), func_id); 4891 return err; 4892 } 4893 4894 meta.func_id = func_id; 4895 /* check args */ 4896 for (i = 0; i < 5; i++) { 4897 if (!fn->check_btf_id) { 4898 err = btf_resolve_helper_id(&env->log, fn, i); 4899 if (err > 0) 4900 meta.btf_id = err; 4901 } 4902 err = check_func_arg(env, i, &meta, fn); 4903 if (err) 4904 return err; 4905 } 4906 4907 err = record_func_map(env, &meta, func_id, insn_idx); 4908 if (err) 4909 return err; 4910 4911 err = record_func_key(env, &meta, func_id, insn_idx); 4912 if (err) 4913 return err; 4914 4915 /* Mark slots with STACK_MISC in case of raw mode, stack offset 4916 * is inferred from register state. 4917 */ 4918 for (i = 0; i < meta.access_size; i++) { 4919 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, 4920 BPF_WRITE, -1, false); 4921 if (err) 4922 return err; 4923 } 4924 4925 if (func_id == BPF_FUNC_tail_call) { 4926 err = check_reference_leak(env); 4927 if (err) { 4928 verbose(env, "tail_call would lead to reference leak\n"); 4929 return err; 4930 } 4931 } else if (is_release_function(func_id)) { 4932 err = release_reference(env, meta.ref_obj_id); 4933 if (err) { 4934 verbose(env, "func %s#%d reference has not been acquired before\n", 4935 func_id_name(func_id), func_id); 4936 return err; 4937 } 4938 } 4939 4940 regs = cur_regs(env); 4941 4942 /* check that flags argument in get_local_storage(map, flags) is 0, 4943 * this is required because get_local_storage() can't return an error. 4944 */ 4945 if (func_id == BPF_FUNC_get_local_storage && 4946 !register_is_null(®s[BPF_REG_2])) { 4947 verbose(env, "get_local_storage() doesn't support non-zero flags\n"); 4948 return -EINVAL; 4949 } 4950 4951 /* reset caller saved regs */ 4952 for (i = 0; i < CALLER_SAVED_REGS; i++) { 4953 mark_reg_not_init(env, regs, caller_saved[i]); 4954 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 4955 } 4956 4957 /* helper call returns 64-bit value. */ 4958 regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; 4959 4960 /* update return register (already marked as written above) */ 4961 if (fn->ret_type == RET_INTEGER) { 4962 /* sets type to SCALAR_VALUE */ 4963 mark_reg_unknown(env, regs, BPF_REG_0); 4964 } else if (fn->ret_type == RET_VOID) { 4965 regs[BPF_REG_0].type = NOT_INIT; 4966 } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL || 4967 fn->ret_type == RET_PTR_TO_MAP_VALUE) { 4968 /* There is no offset yet applied, variable or fixed */ 4969 mark_reg_known_zero(env, regs, BPF_REG_0); 4970 /* remember map_ptr, so that check_map_access() 4971 * can check 'value_size' boundary of memory access 4972 * to map element returned from bpf_map_lookup_elem() 4973 */ 4974 if (meta.map_ptr == NULL) { 4975 verbose(env, 4976 "kernel subsystem misconfigured verifier\n"); 4977 return -EINVAL; 4978 } 4979 regs[BPF_REG_0].map_ptr = meta.map_ptr; 4980 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { 4981 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; 4982 if (map_value_has_spin_lock(meta.map_ptr)) 4983 regs[BPF_REG_0].id = ++env->id_gen; 4984 } else { 4985 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; 4986 regs[BPF_REG_0].id = ++env->id_gen; 4987 } 4988 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { 4989 mark_reg_known_zero(env, regs, BPF_REG_0); 4990 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; 4991 regs[BPF_REG_0].id = ++env->id_gen; 4992 } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { 4993 mark_reg_known_zero(env, regs, BPF_REG_0); 4994 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; 4995 regs[BPF_REG_0].id = ++env->id_gen; 4996 } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { 4997 mark_reg_known_zero(env, regs, BPF_REG_0); 4998 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; 4999 regs[BPF_REG_0].id = ++env->id_gen; 5000 } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) { 5001 mark_reg_known_zero(env, regs, BPF_REG_0); 5002 regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; 5003 regs[BPF_REG_0].id = ++env->id_gen; 5004 regs[BPF_REG_0].mem_size = meta.mem_size; 5005 } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { 5006 int ret_btf_id; 5007 5008 mark_reg_known_zero(env, regs, BPF_REG_0); 5009 regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; 5010 ret_btf_id = *fn->ret_btf_id; 5011 if (ret_btf_id == 0) { 5012 verbose(env, "invalid return type %d of func %s#%d\n", 5013 fn->ret_type, func_id_name(func_id), func_id); 5014 return -EINVAL; 5015 } 5016 regs[BPF_REG_0].btf_id = ret_btf_id; 5017 } else { 5018 verbose(env, "unknown return type %d of func %s#%d\n", 5019 fn->ret_type, func_id_name(func_id), func_id); 5020 return -EINVAL; 5021 } 5022 5023 if (is_ptr_cast_function(func_id)) { 5024 /* For release_reference() */ 5025 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; 5026 } else if (is_acquire_function(func_id, meta.map_ptr)) { 5027 int id = acquire_reference_state(env, insn_idx); 5028 5029 if (id < 0) 5030 return id; 5031 /* For mark_ptr_or_null_reg() */ 5032 regs[BPF_REG_0].id = id; 5033 /* For release_reference() */ 5034 regs[BPF_REG_0].ref_obj_id = id; 5035 } 5036 5037 do_refine_retval_range(regs, fn->ret_type, func_id, &meta); 5038 5039 err = check_map_func_compatibility(env, meta.map_ptr, func_id); 5040 if (err) 5041 return err; 5042 5043 if ((func_id == BPF_FUNC_get_stack || 5044 func_id == BPF_FUNC_get_task_stack) && 5045 !env->prog->has_callchain_buf) { 5046 const char *err_str; 5047 5048 #ifdef CONFIG_PERF_EVENTS 5049 err = get_callchain_buffers(sysctl_perf_event_max_stack); 5050 err_str = "cannot get callchain buffer for func %s#%d\n"; 5051 #else 5052 err = -ENOTSUPP; 5053 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n"; 5054 #endif 5055 if (err) { 5056 verbose(env, err_str, func_id_name(func_id), func_id); 5057 return err; 5058 } 5059 5060 env->prog->has_callchain_buf = true; 5061 } 5062 5063 if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack) 5064 env->prog->call_get_stack = true; 5065 5066 if (changes_data) 5067 clear_all_pkt_pointers(env); 5068 return 0; 5069 } 5070 5071 static bool signed_add_overflows(s64 a, s64 b) 5072 { 5073 /* Do the add in u64, where overflow is well-defined */ 5074 s64 res = (s64)((u64)a + (u64)b); 5075 5076 if (b < 0) 5077 return res > a; 5078 return res < a; 5079 } 5080 5081 static bool signed_add32_overflows(s64 a, s64 b) 5082 { 5083 /* Do the add in u32, where overflow is well-defined */ 5084 s32 res = (s32)((u32)a + (u32)b); 5085 5086 if (b < 0) 5087 return res > a; 5088 return res < a; 5089 } 5090 5091 static bool signed_sub_overflows(s32 a, s32 b) 5092 { 5093 /* Do the sub in u64, where overflow is well-defined */ 5094 s64 res = (s64)((u64)a - (u64)b); 5095 5096 if (b < 0) 5097 return res < a; 5098 return res > a; 5099 } 5100 5101 static bool signed_sub32_overflows(s32 a, s32 b) 5102 { 5103 /* Do the sub in u64, where overflow is well-defined */ 5104 s32 res = (s32)((u32)a - (u32)b); 5105 5106 if (b < 0) 5107 return res < a; 5108 return res > a; 5109 } 5110 5111 static bool check_reg_sane_offset(struct bpf_verifier_env *env, 5112 const struct bpf_reg_state *reg, 5113 enum bpf_reg_type type) 5114 { 5115 bool known = tnum_is_const(reg->var_off); 5116 s64 val = reg->var_off.value; 5117 s64 smin = reg->smin_value; 5118 5119 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { 5120 verbose(env, "math between %s pointer and %lld is not allowed\n", 5121 reg_type_str[type], val); 5122 return false; 5123 } 5124 5125 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { 5126 verbose(env, "%s pointer offset %d is not allowed\n", 5127 reg_type_str[type], reg->off); 5128 return false; 5129 } 5130 5131 if (smin == S64_MIN) { 5132 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", 5133 reg_type_str[type]); 5134 return false; 5135 } 5136 5137 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { 5138 verbose(env, "value %lld makes %s pointer be out of bounds\n", 5139 smin, reg_type_str[type]); 5140 return false; 5141 } 5142 5143 return true; 5144 } 5145 5146 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) 5147 { 5148 return &env->insn_aux_data[env->insn_idx]; 5149 } 5150 5151 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, 5152 u32 *ptr_limit, u8 opcode, bool off_is_neg) 5153 { 5154 bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || 5155 (opcode == BPF_SUB && !off_is_neg); 5156 u32 off; 5157 5158 switch (ptr_reg->type) { 5159 case PTR_TO_STACK: 5160 /* Indirect variable offset stack access is prohibited in 5161 * unprivileged mode so it's not handled here. 5162 */ 5163 off = ptr_reg->off + ptr_reg->var_off.value; 5164 if (mask_to_left) 5165 *ptr_limit = MAX_BPF_STACK + off; 5166 else 5167 *ptr_limit = -off; 5168 return 0; 5169 case PTR_TO_MAP_VALUE: 5170 if (mask_to_left) { 5171 *ptr_limit = ptr_reg->umax_value + ptr_reg->off; 5172 } else { 5173 off = ptr_reg->smin_value + ptr_reg->off; 5174 *ptr_limit = ptr_reg->map_ptr->value_size - off; 5175 } 5176 return 0; 5177 default: 5178 return -EINVAL; 5179 } 5180 } 5181 5182 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, 5183 const struct bpf_insn *insn) 5184 { 5185 return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K; 5186 } 5187 5188 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, 5189 u32 alu_state, u32 alu_limit) 5190 { 5191 /* If we arrived here from different branches with different 5192 * state or limits to sanitize, then this won't work. 5193 */ 5194 if (aux->alu_state && 5195 (aux->alu_state != alu_state || 5196 aux->alu_limit != alu_limit)) 5197 return -EACCES; 5198 5199 /* Corresponding fixup done in fixup_bpf_calls(). */ 5200 aux->alu_state = alu_state; 5201 aux->alu_limit = alu_limit; 5202 return 0; 5203 } 5204 5205 static int sanitize_val_alu(struct bpf_verifier_env *env, 5206 struct bpf_insn *insn) 5207 { 5208 struct bpf_insn_aux_data *aux = cur_aux(env); 5209 5210 if (can_skip_alu_sanitation(env, insn)) 5211 return 0; 5212 5213 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); 5214 } 5215 5216 static int sanitize_ptr_alu(struct bpf_verifier_env *env, 5217 struct bpf_insn *insn, 5218 const struct bpf_reg_state *ptr_reg, 5219 struct bpf_reg_state *dst_reg, 5220 bool off_is_neg) 5221 { 5222 struct bpf_verifier_state *vstate = env->cur_state; 5223 struct bpf_insn_aux_data *aux = cur_aux(env); 5224 bool ptr_is_dst_reg = ptr_reg == dst_reg; 5225 u8 opcode = BPF_OP(insn->code); 5226 u32 alu_state, alu_limit; 5227 struct bpf_reg_state tmp; 5228 bool ret; 5229 5230 if (can_skip_alu_sanitation(env, insn)) 5231 return 0; 5232 5233 /* We already marked aux for masking from non-speculative 5234 * paths, thus we got here in the first place. We only care 5235 * to explore bad access from here. 5236 */ 5237 if (vstate->speculative) 5238 goto do_sim; 5239 5240 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; 5241 alu_state |= ptr_is_dst_reg ? 5242 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; 5243 5244 if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) 5245 return 0; 5246 if (update_alu_sanitation_state(aux, alu_state, alu_limit)) 5247 return -EACCES; 5248 do_sim: 5249 /* Simulate and find potential out-of-bounds access under 5250 * speculative execution from truncation as a result of 5251 * masking when off was not within expected range. If off 5252 * sits in dst, then we temporarily need to move ptr there 5253 * to simulate dst (== 0) +/-= ptr. Needed, for example, 5254 * for cases where we use K-based arithmetic in one direction 5255 * and truncated reg-based in the other in order to explore 5256 * bad access. 5257 */ 5258 if (!ptr_is_dst_reg) { 5259 tmp = *dst_reg; 5260 *dst_reg = *ptr_reg; 5261 } 5262 ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); 5263 if (!ptr_is_dst_reg && ret) 5264 *dst_reg = tmp; 5265 return !ret ? -EFAULT : 0; 5266 } 5267 5268 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. 5269 * Caller should also handle BPF_MOV case separately. 5270 * If we return -EACCES, caller may want to try again treating pointer as a 5271 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks. 5272 */ 5273 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, 5274 struct bpf_insn *insn, 5275 const struct bpf_reg_state *ptr_reg, 5276 const struct bpf_reg_state *off_reg) 5277 { 5278 struct bpf_verifier_state *vstate = env->cur_state; 5279 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 5280 struct bpf_reg_state *regs = state->regs, *dst_reg; 5281 bool known = tnum_is_const(off_reg->var_off); 5282 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value, 5283 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; 5284 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, 5285 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; 5286 u32 dst = insn->dst_reg, src = insn->src_reg; 5287 u8 opcode = BPF_OP(insn->code); 5288 int ret; 5289 5290 dst_reg = ®s[dst]; 5291 5292 if ((known && (smin_val != smax_val || umin_val != umax_val)) || 5293 smin_val > smax_val || umin_val > umax_val) { 5294 /* Taint dst register if offset had invalid bounds derived from 5295 * e.g. dead branches. 5296 */ 5297 __mark_reg_unknown(env, dst_reg); 5298 return 0; 5299 } 5300 5301 if (BPF_CLASS(insn->code) != BPF_ALU64) { 5302 /* 32-bit ALU ops on pointers produce (meaningless) scalars */ 5303 if (opcode == BPF_SUB && env->allow_ptr_leaks) { 5304 __mark_reg_unknown(env, dst_reg); 5305 return 0; 5306 } 5307 5308 verbose(env, 5309 "R%d 32-bit pointer arithmetic prohibited\n", 5310 dst); 5311 return -EACCES; 5312 } 5313 5314 switch (ptr_reg->type) { 5315 case PTR_TO_MAP_VALUE_OR_NULL: 5316 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", 5317 dst, reg_type_str[ptr_reg->type]); 5318 return -EACCES; 5319 case CONST_PTR_TO_MAP: 5320 /* smin_val represents the known value */ 5321 if (known && smin_val == 0 && opcode == BPF_ADD) 5322 break; 5323 /* fall-through */ 5324 case PTR_TO_PACKET_END: 5325 case PTR_TO_SOCKET: 5326 case PTR_TO_SOCKET_OR_NULL: 5327 case PTR_TO_SOCK_COMMON: 5328 case PTR_TO_SOCK_COMMON_OR_NULL: 5329 case PTR_TO_TCP_SOCK: 5330 case PTR_TO_TCP_SOCK_OR_NULL: 5331 case PTR_TO_XDP_SOCK: 5332 verbose(env, "R%d pointer arithmetic on %s prohibited\n", 5333 dst, reg_type_str[ptr_reg->type]); 5334 return -EACCES; 5335 case PTR_TO_MAP_VALUE: 5336 if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) { 5337 verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", 5338 off_reg == dst_reg ? dst : src); 5339 return -EACCES; 5340 } 5341 /* fall-through */ 5342 default: 5343 break; 5344 } 5345 5346 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id. 5347 * The id may be overwritten later if we create a new variable offset. 5348 */ 5349 dst_reg->type = ptr_reg->type; 5350 dst_reg->id = ptr_reg->id; 5351 5352 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) || 5353 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) 5354 return -EINVAL; 5355 5356 /* pointer types do not carry 32-bit bounds at the moment. */ 5357 __mark_reg32_unbounded(dst_reg); 5358 5359 switch (opcode) { 5360 case BPF_ADD: 5361 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); 5362 if (ret < 0) { 5363 verbose(env, "R%d tried to add from different maps or paths\n", dst); 5364 return ret; 5365 } 5366 /* We can take a fixed offset as long as it doesn't overflow 5367 * the s32 'off' field 5368 */ 5369 if (known && (ptr_reg->off + smin_val == 5370 (s64)(s32)(ptr_reg->off + smin_val))) { 5371 /* pointer += K. Accumulate it into fixed offset */ 5372 dst_reg->smin_value = smin_ptr; 5373 dst_reg->smax_value = smax_ptr; 5374 dst_reg->umin_value = umin_ptr; 5375 dst_reg->umax_value = umax_ptr; 5376 dst_reg->var_off = ptr_reg->var_off; 5377 dst_reg->off = ptr_reg->off + smin_val; 5378 dst_reg->raw = ptr_reg->raw; 5379 break; 5380 } 5381 /* A new variable offset is created. Note that off_reg->off 5382 * == 0, since it's a scalar. 5383 * dst_reg gets the pointer type and since some positive 5384 * integer value was added to the pointer, give it a new 'id' 5385 * if it's a PTR_TO_PACKET. 5386 * this creates a new 'base' pointer, off_reg (variable) gets 5387 * added into the variable offset, and we copy the fixed offset 5388 * from ptr_reg. 5389 */ 5390 if (signed_add_overflows(smin_ptr, smin_val) || 5391 signed_add_overflows(smax_ptr, smax_val)) { 5392 dst_reg->smin_value = S64_MIN; 5393 dst_reg->smax_value = S64_MAX; 5394 } else { 5395 dst_reg->smin_value = smin_ptr + smin_val; 5396 dst_reg->smax_value = smax_ptr + smax_val; 5397 } 5398 if (umin_ptr + umin_val < umin_ptr || 5399 umax_ptr + umax_val < umax_ptr) { 5400 dst_reg->umin_value = 0; 5401 dst_reg->umax_value = U64_MAX; 5402 } else { 5403 dst_reg->umin_value = umin_ptr + umin_val; 5404 dst_reg->umax_value = umax_ptr + umax_val; 5405 } 5406 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off); 5407 dst_reg->off = ptr_reg->off; 5408 dst_reg->raw = ptr_reg->raw; 5409 if (reg_is_pkt_pointer(ptr_reg)) { 5410 dst_reg->id = ++env->id_gen; 5411 /* something was added to pkt_ptr, set range to zero */ 5412 dst_reg->raw = 0; 5413 } 5414 break; 5415 case BPF_SUB: 5416 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); 5417 if (ret < 0) { 5418 verbose(env, "R%d tried to sub from different maps or paths\n", dst); 5419 return ret; 5420 } 5421 if (dst_reg == off_reg) { 5422 /* scalar -= pointer. Creates an unknown scalar */ 5423 verbose(env, "R%d tried to subtract pointer from scalar\n", 5424 dst); 5425 return -EACCES; 5426 } 5427 /* We don't allow subtraction from FP, because (according to 5428 * test_verifier.c test "invalid fp arithmetic", JITs might not 5429 * be able to deal with it. 5430 */ 5431 if (ptr_reg->type == PTR_TO_STACK) { 5432 verbose(env, "R%d subtraction from stack pointer prohibited\n", 5433 dst); 5434 return -EACCES; 5435 } 5436 if (known && (ptr_reg->off - smin_val == 5437 (s64)(s32)(ptr_reg->off - smin_val))) { 5438 /* pointer -= K. Subtract it from fixed offset */ 5439 dst_reg->smin_value = smin_ptr; 5440 dst_reg->smax_value = smax_ptr; 5441 dst_reg->umin_value = umin_ptr; 5442 dst_reg->umax_value = umax_ptr; 5443 dst_reg->var_off = ptr_reg->var_off; 5444 dst_reg->id = ptr_reg->id; 5445 dst_reg->off = ptr_reg->off - smin_val; 5446 dst_reg->raw = ptr_reg->raw; 5447 break; 5448 } 5449 /* A new variable offset is created. If the subtrahend is known 5450 * nonnegative, then any reg->range we had before is still good. 5451 */ 5452 if (signed_sub_overflows(smin_ptr, smax_val) || 5453 signed_sub_overflows(smax_ptr, smin_val)) { 5454 /* Overflow possible, we know nothing */ 5455 dst_reg->smin_value = S64_MIN; 5456 dst_reg->smax_value = S64_MAX; 5457 } else { 5458 dst_reg->smin_value = smin_ptr - smax_val; 5459 dst_reg->smax_value = smax_ptr - smin_val; 5460 } 5461 if (umin_ptr < umax_val) { 5462 /* Overflow possible, we know nothing */ 5463 dst_reg->umin_value = 0; 5464 dst_reg->umax_value = U64_MAX; 5465 } else { 5466 /* Cannot overflow (as long as bounds are consistent) */ 5467 dst_reg->umin_value = umin_ptr - umax_val; 5468 dst_reg->umax_value = umax_ptr - umin_val; 5469 } 5470 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off); 5471 dst_reg->off = ptr_reg->off; 5472 dst_reg->raw = ptr_reg->raw; 5473 if (reg_is_pkt_pointer(ptr_reg)) { 5474 dst_reg->id = ++env->id_gen; 5475 /* something was added to pkt_ptr, set range to zero */ 5476 if (smin_val < 0) 5477 dst_reg->raw = 0; 5478 } 5479 break; 5480 case BPF_AND: 5481 case BPF_OR: 5482 case BPF_XOR: 5483 /* bitwise ops on pointers are troublesome, prohibit. */ 5484 verbose(env, "R%d bitwise operator %s on pointer prohibited\n", 5485 dst, bpf_alu_string[opcode >> 4]); 5486 return -EACCES; 5487 default: 5488 /* other operators (e.g. MUL,LSH) produce non-pointer results */ 5489 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", 5490 dst, bpf_alu_string[opcode >> 4]); 5491 return -EACCES; 5492 } 5493 5494 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) 5495 return -EINVAL; 5496 5497 __update_reg_bounds(dst_reg); 5498 __reg_deduce_bounds(dst_reg); 5499 __reg_bound_offset(dst_reg); 5500 5501 /* For unprivileged we require that resulting offset must be in bounds 5502 * in order to be able to sanitize access later on. 5503 */ 5504 if (!env->bypass_spec_v1) { 5505 if (dst_reg->type == PTR_TO_MAP_VALUE && 5506 check_map_access(env, dst, dst_reg->off, 1, false)) { 5507 verbose(env, "R%d pointer arithmetic of map value goes out of range, " 5508 "prohibited for !root\n", dst); 5509 return -EACCES; 5510 } else if (dst_reg->type == PTR_TO_STACK && 5511 check_stack_access(env, dst_reg, dst_reg->off + 5512 dst_reg->var_off.value, 1)) { 5513 verbose(env, "R%d stack pointer arithmetic goes out of range, " 5514 "prohibited for !root\n", dst); 5515 return -EACCES; 5516 } 5517 } 5518 5519 return 0; 5520 } 5521 5522 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, 5523 struct bpf_reg_state *src_reg) 5524 { 5525 s32 smin_val = src_reg->s32_min_value; 5526 s32 smax_val = src_reg->s32_max_value; 5527 u32 umin_val = src_reg->u32_min_value; 5528 u32 umax_val = src_reg->u32_max_value; 5529 5530 if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) || 5531 signed_add32_overflows(dst_reg->s32_max_value, smax_val)) { 5532 dst_reg->s32_min_value = S32_MIN; 5533 dst_reg->s32_max_value = S32_MAX; 5534 } else { 5535 dst_reg->s32_min_value += smin_val; 5536 dst_reg->s32_max_value += smax_val; 5537 } 5538 if (dst_reg->u32_min_value + umin_val < umin_val || 5539 dst_reg->u32_max_value + umax_val < umax_val) { 5540 dst_reg->u32_min_value = 0; 5541 dst_reg->u32_max_value = U32_MAX; 5542 } else { 5543 dst_reg->u32_min_value += umin_val; 5544 dst_reg->u32_max_value += umax_val; 5545 } 5546 } 5547 5548 static void scalar_min_max_add(struct bpf_reg_state *dst_reg, 5549 struct bpf_reg_state *src_reg) 5550 { 5551 s64 smin_val = src_reg->smin_value; 5552 s64 smax_val = src_reg->smax_value; 5553 u64 umin_val = src_reg->umin_value; 5554 u64 umax_val = src_reg->umax_value; 5555 5556 if (signed_add_overflows(dst_reg->smin_value, smin_val) || 5557 signed_add_overflows(dst_reg->smax_value, smax_val)) { 5558 dst_reg->smin_value = S64_MIN; 5559 dst_reg->smax_value = S64_MAX; 5560 } else { 5561 dst_reg->smin_value += smin_val; 5562 dst_reg->smax_value += smax_val; 5563 } 5564 if (dst_reg->umin_value + umin_val < umin_val || 5565 dst_reg->umax_value + umax_val < umax_val) { 5566 dst_reg->umin_value = 0; 5567 dst_reg->umax_value = U64_MAX; 5568 } else { 5569 dst_reg->umin_value += umin_val; 5570 dst_reg->umax_value += umax_val; 5571 } 5572 } 5573 5574 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, 5575 struct bpf_reg_state *src_reg) 5576 { 5577 s32 smin_val = src_reg->s32_min_value; 5578 s32 smax_val = src_reg->s32_max_value; 5579 u32 umin_val = src_reg->u32_min_value; 5580 u32 umax_val = src_reg->u32_max_value; 5581 5582 if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) || 5583 signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) { 5584 /* Overflow possible, we know nothing */ 5585 dst_reg->s32_min_value = S32_MIN; 5586 dst_reg->s32_max_value = S32_MAX; 5587 } else { 5588 dst_reg->s32_min_value -= smax_val; 5589 dst_reg->s32_max_value -= smin_val; 5590 } 5591 if (dst_reg->u32_min_value < umax_val) { 5592 /* Overflow possible, we know nothing */ 5593 dst_reg->u32_min_value = 0; 5594 dst_reg->u32_max_value = U32_MAX; 5595 } else { 5596 /* Cannot overflow (as long as bounds are consistent) */ 5597 dst_reg->u32_min_value -= umax_val; 5598 dst_reg->u32_max_value -= umin_val; 5599 } 5600 } 5601 5602 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, 5603 struct bpf_reg_state *src_reg) 5604 { 5605 s64 smin_val = src_reg->smin_value; 5606 s64 smax_val = src_reg->smax_value; 5607 u64 umin_val = src_reg->umin_value; 5608 u64 umax_val = src_reg->umax_value; 5609 5610 if (signed_sub_overflows(dst_reg->smin_value, smax_val) || 5611 signed_sub_overflows(dst_reg->smax_value, smin_val)) { 5612 /* Overflow possible, we know nothing */ 5613 dst_reg->smin_value = S64_MIN; 5614 dst_reg->smax_value = S64_MAX; 5615 } else { 5616 dst_reg->smin_value -= smax_val; 5617 dst_reg->smax_value -= smin_val; 5618 } 5619 if (dst_reg->umin_value < umax_val) { 5620 /* Overflow possible, we know nothing */ 5621 dst_reg->umin_value = 0; 5622 dst_reg->umax_value = U64_MAX; 5623 } else { 5624 /* Cannot overflow (as long as bounds are consistent) */ 5625 dst_reg->umin_value -= umax_val; 5626 dst_reg->umax_value -= umin_val; 5627 } 5628 } 5629 5630 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg, 5631 struct bpf_reg_state *src_reg) 5632 { 5633 s32 smin_val = src_reg->s32_min_value; 5634 u32 umin_val = src_reg->u32_min_value; 5635 u32 umax_val = src_reg->u32_max_value; 5636 5637 if (smin_val < 0 || dst_reg->s32_min_value < 0) { 5638 /* Ain't nobody got time to multiply that sign */ 5639 __mark_reg32_unbounded(dst_reg); 5640 return; 5641 } 5642 /* Both values are positive, so we can work with unsigned and 5643 * copy the result to signed (unless it exceeds S32_MAX). 5644 */ 5645 if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) { 5646 /* Potential overflow, we know nothing */ 5647 __mark_reg32_unbounded(dst_reg); 5648 return; 5649 } 5650 dst_reg->u32_min_value *= umin_val; 5651 dst_reg->u32_max_value *= umax_val; 5652 if (dst_reg->u32_max_value > S32_MAX) { 5653 /* Overflow possible, we know nothing */ 5654 dst_reg->s32_min_value = S32_MIN; 5655 dst_reg->s32_max_value = S32_MAX; 5656 } else { 5657 dst_reg->s32_min_value = dst_reg->u32_min_value; 5658 dst_reg->s32_max_value = dst_reg->u32_max_value; 5659 } 5660 } 5661 5662 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg, 5663 struct bpf_reg_state *src_reg) 5664 { 5665 s64 smin_val = src_reg->smin_value; 5666 u64 umin_val = src_reg->umin_value; 5667 u64 umax_val = src_reg->umax_value; 5668 5669 if (smin_val < 0 || dst_reg->smin_value < 0) { 5670 /* Ain't nobody got time to multiply that sign */ 5671 __mark_reg64_unbounded(dst_reg); 5672 return; 5673 } 5674 /* Both values are positive, so we can work with unsigned and 5675 * copy the result to signed (unless it exceeds S64_MAX). 5676 */ 5677 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) { 5678 /* Potential overflow, we know nothing */ 5679 __mark_reg64_unbounded(dst_reg); 5680 return; 5681 } 5682 dst_reg->umin_value *= umin_val; 5683 dst_reg->umax_value *= umax_val; 5684 if (dst_reg->umax_value > S64_MAX) { 5685 /* Overflow possible, we know nothing */ 5686 dst_reg->smin_value = S64_MIN; 5687 dst_reg->smax_value = S64_MAX; 5688 } else { 5689 dst_reg->smin_value = dst_reg->umin_value; 5690 dst_reg->smax_value = dst_reg->umax_value; 5691 } 5692 } 5693 5694 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, 5695 struct bpf_reg_state *src_reg) 5696 { 5697 bool src_known = tnum_subreg_is_const(src_reg->var_off); 5698 bool dst_known = tnum_subreg_is_const(dst_reg->var_off); 5699 struct tnum var32_off = tnum_subreg(dst_reg->var_off); 5700 s32 smin_val = src_reg->s32_min_value; 5701 u32 umax_val = src_reg->u32_max_value; 5702 5703 /* Assuming scalar64_min_max_and will be called so its safe 5704 * to skip updating register for known 32-bit case. 5705 */ 5706 if (src_known && dst_known) 5707 return; 5708 5709 /* We get our minimum from the var_off, since that's inherently 5710 * bitwise. Our maximum is the minimum of the operands' maxima. 5711 */ 5712 dst_reg->u32_min_value = var32_off.value; 5713 dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val); 5714 if (dst_reg->s32_min_value < 0 || smin_val < 0) { 5715 /* Lose signed bounds when ANDing negative numbers, 5716 * ain't nobody got time for that. 5717 */ 5718 dst_reg->s32_min_value = S32_MIN; 5719 dst_reg->s32_max_value = S32_MAX; 5720 } else { 5721 /* ANDing two positives gives a positive, so safe to 5722 * cast result into s64. 5723 */ 5724 dst_reg->s32_min_value = dst_reg->u32_min_value; 5725 dst_reg->s32_max_value = dst_reg->u32_max_value; 5726 } 5727 5728 } 5729 5730 static void scalar_min_max_and(struct bpf_reg_state *dst_reg, 5731 struct bpf_reg_state *src_reg) 5732 { 5733 bool src_known = tnum_is_const(src_reg->var_off); 5734 bool dst_known = tnum_is_const(dst_reg->var_off); 5735 s64 smin_val = src_reg->smin_value; 5736 u64 umax_val = src_reg->umax_value; 5737 5738 if (src_known && dst_known) { 5739 __mark_reg_known(dst_reg, dst_reg->var_off.value & 5740 src_reg->var_off.value); 5741 return; 5742 } 5743 5744 /* We get our minimum from the var_off, since that's inherently 5745 * bitwise. Our maximum is the minimum of the operands' maxima. 5746 */ 5747 dst_reg->umin_value = dst_reg->var_off.value; 5748 dst_reg->umax_value = min(dst_reg->umax_value, umax_val); 5749 if (dst_reg->smin_value < 0 || smin_val < 0) { 5750 /* Lose signed bounds when ANDing negative numbers, 5751 * ain't nobody got time for that. 5752 */ 5753 dst_reg->smin_value = S64_MIN; 5754 dst_reg->smax_value = S64_MAX; 5755 } else { 5756 /* ANDing two positives gives a positive, so safe to 5757 * cast result into s64. 5758 */ 5759 dst_reg->smin_value = dst_reg->umin_value; 5760 dst_reg->smax_value = dst_reg->umax_value; 5761 } 5762 /* We may learn something more from the var_off */ 5763 __update_reg_bounds(dst_reg); 5764 } 5765 5766 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, 5767 struct bpf_reg_state *src_reg) 5768 { 5769 bool src_known = tnum_subreg_is_const(src_reg->var_off); 5770 bool dst_known = tnum_subreg_is_const(dst_reg->var_off); 5771 struct tnum var32_off = tnum_subreg(dst_reg->var_off); 5772 s32 smin_val = src_reg->smin_value; 5773 u32 umin_val = src_reg->umin_value; 5774 5775 /* Assuming scalar64_min_max_or will be called so it is safe 5776 * to skip updating register for known case. 5777 */ 5778 if (src_known && dst_known) 5779 return; 5780 5781 /* We get our maximum from the var_off, and our minimum is the 5782 * maximum of the operands' minima 5783 */ 5784 dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val); 5785 dst_reg->u32_max_value = var32_off.value | var32_off.mask; 5786 if (dst_reg->s32_min_value < 0 || smin_val < 0) { 5787 /* Lose signed bounds when ORing negative numbers, 5788 * ain't nobody got time for that. 5789 */ 5790 dst_reg->s32_min_value = S32_MIN; 5791 dst_reg->s32_max_value = S32_MAX; 5792 } else { 5793 /* ORing two positives gives a positive, so safe to 5794 * cast result into s64. 5795 */ 5796 dst_reg->s32_min_value = dst_reg->umin_value; 5797 dst_reg->s32_max_value = dst_reg->umax_value; 5798 } 5799 } 5800 5801 static void scalar_min_max_or(struct bpf_reg_state *dst_reg, 5802 struct bpf_reg_state *src_reg) 5803 { 5804 bool src_known = tnum_is_const(src_reg->var_off); 5805 bool dst_known = tnum_is_const(dst_reg->var_off); 5806 s64 smin_val = src_reg->smin_value; 5807 u64 umin_val = src_reg->umin_value; 5808 5809 if (src_known && dst_known) { 5810 __mark_reg_known(dst_reg, dst_reg->var_off.value | 5811 src_reg->var_off.value); 5812 return; 5813 } 5814 5815 /* We get our maximum from the var_off, and our minimum is the 5816 * maximum of the operands' minima 5817 */ 5818 dst_reg->umin_value = max(dst_reg->umin_value, umin_val); 5819 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask; 5820 if (dst_reg->smin_value < 0 || smin_val < 0) { 5821 /* Lose signed bounds when ORing negative numbers, 5822 * ain't nobody got time for that. 5823 */ 5824 dst_reg->smin_value = S64_MIN; 5825 dst_reg->smax_value = S64_MAX; 5826 } else { 5827 /* ORing two positives gives a positive, so safe to 5828 * cast result into s64. 5829 */ 5830 dst_reg->smin_value = dst_reg->umin_value; 5831 dst_reg->smax_value = dst_reg->umax_value; 5832 } 5833 /* We may learn something more from the var_off */ 5834 __update_reg_bounds(dst_reg); 5835 } 5836 5837 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, 5838 struct bpf_reg_state *src_reg) 5839 { 5840 bool src_known = tnum_subreg_is_const(src_reg->var_off); 5841 bool dst_known = tnum_subreg_is_const(dst_reg->var_off); 5842 struct tnum var32_off = tnum_subreg(dst_reg->var_off); 5843 s32 smin_val = src_reg->s32_min_value; 5844 5845 /* Assuming scalar64_min_max_xor will be called so it is safe 5846 * to skip updating register for known case. 5847 */ 5848 if (src_known && dst_known) 5849 return; 5850 5851 /* We get both minimum and maximum from the var32_off. */ 5852 dst_reg->u32_min_value = var32_off.value; 5853 dst_reg->u32_max_value = var32_off.value | var32_off.mask; 5854 5855 if (dst_reg->s32_min_value >= 0 && smin_val >= 0) { 5856 /* XORing two positive sign numbers gives a positive, 5857 * so safe to cast u32 result into s32. 5858 */ 5859 dst_reg->s32_min_value = dst_reg->u32_min_value; 5860 dst_reg->s32_max_value = dst_reg->u32_max_value; 5861 } else { 5862 dst_reg->s32_min_value = S32_MIN; 5863 dst_reg->s32_max_value = S32_MAX; 5864 } 5865 } 5866 5867 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg, 5868 struct bpf_reg_state *src_reg) 5869 { 5870 bool src_known = tnum_is_const(src_reg->var_off); 5871 bool dst_known = tnum_is_const(dst_reg->var_off); 5872 s64 smin_val = src_reg->smin_value; 5873 5874 if (src_known && dst_known) { 5875 /* dst_reg->var_off.value has been updated earlier */ 5876 __mark_reg_known(dst_reg, dst_reg->var_off.value); 5877 return; 5878 } 5879 5880 /* We get both minimum and maximum from the var_off. */ 5881 dst_reg->umin_value = dst_reg->var_off.value; 5882 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask; 5883 5884 if (dst_reg->smin_value >= 0 && smin_val >= 0) { 5885 /* XORing two positive sign numbers gives a positive, 5886 * so safe to cast u64 result into s64. 5887 */ 5888 dst_reg->smin_value = dst_reg->umin_value; 5889 dst_reg->smax_value = dst_reg->umax_value; 5890 } else { 5891 dst_reg->smin_value = S64_MIN; 5892 dst_reg->smax_value = S64_MAX; 5893 } 5894 5895 __update_reg_bounds(dst_reg); 5896 } 5897 5898 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, 5899 u64 umin_val, u64 umax_val) 5900 { 5901 /* We lose all sign bit information (except what we can pick 5902 * up from var_off) 5903 */ 5904 dst_reg->s32_min_value = S32_MIN; 5905 dst_reg->s32_max_value = S32_MAX; 5906 /* If we might shift our top bit out, then we know nothing */ 5907 if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) { 5908 dst_reg->u32_min_value = 0; 5909 dst_reg->u32_max_value = U32_MAX; 5910 } else { 5911 dst_reg->u32_min_value <<= umin_val; 5912 dst_reg->u32_max_value <<= umax_val; 5913 } 5914 } 5915 5916 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, 5917 struct bpf_reg_state *src_reg) 5918 { 5919 u32 umax_val = src_reg->u32_max_value; 5920 u32 umin_val = src_reg->u32_min_value; 5921 /* u32 alu operation will zext upper bits */ 5922 struct tnum subreg = tnum_subreg(dst_reg->var_off); 5923 5924 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val); 5925 dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val)); 5926 /* Not required but being careful mark reg64 bounds as unknown so 5927 * that we are forced to pick them up from tnum and zext later and 5928 * if some path skips this step we are still safe. 5929 */ 5930 __mark_reg64_unbounded(dst_reg); 5931 __update_reg32_bounds(dst_reg); 5932 } 5933 5934 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg, 5935 u64 umin_val, u64 umax_val) 5936 { 5937 /* Special case <<32 because it is a common compiler pattern to sign 5938 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are 5939 * positive we know this shift will also be positive so we can track 5940 * bounds correctly. Otherwise we lose all sign bit information except 5941 * what we can pick up from var_off. Perhaps we can generalize this 5942 * later to shifts of any length. 5943 */ 5944 if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0) 5945 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32; 5946 else 5947 dst_reg->smax_value = S64_MAX; 5948 5949 if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0) 5950 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32; 5951 else 5952 dst_reg->smin_value = S64_MIN; 5953 5954 /* If we might shift our top bit out, then we know nothing */ 5955 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) { 5956 dst_reg->umin_value = 0; 5957 dst_reg->umax_value = U64_MAX; 5958 } else { 5959 dst_reg->umin_value <<= umin_val; 5960 dst_reg->umax_value <<= umax_val; 5961 } 5962 } 5963 5964 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg, 5965 struct bpf_reg_state *src_reg) 5966 { 5967 u64 umax_val = src_reg->umax_value; 5968 u64 umin_val = src_reg->umin_value; 5969 5970 /* scalar64 calc uses 32bit unshifted bounds so must be called first */ 5971 __scalar64_min_max_lsh(dst_reg, umin_val, umax_val); 5972 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val); 5973 5974 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); 5975 /* We may learn something more from the var_off */ 5976 __update_reg_bounds(dst_reg); 5977 } 5978 5979 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg, 5980 struct bpf_reg_state *src_reg) 5981 { 5982 struct tnum subreg = tnum_subreg(dst_reg->var_off); 5983 u32 umax_val = src_reg->u32_max_value; 5984 u32 umin_val = src_reg->u32_min_value; 5985 5986 /* BPF_RSH is an unsigned shift. If the value in dst_reg might 5987 * be negative, then either: 5988 * 1) src_reg might be zero, so the sign bit of the result is 5989 * unknown, so we lose our signed bounds 5990 * 2) it's known negative, thus the unsigned bounds capture the 5991 * signed bounds 5992 * 3) the signed bounds cross zero, so they tell us nothing 5993 * about the result 5994 * If the value in dst_reg is known nonnegative, then again the 5995 * unsigned bounts capture the signed bounds. 5996 * Thus, in all cases it suffices to blow away our signed bounds 5997 * and rely on inferring new ones from the unsigned bounds and 5998 * var_off of the result. 5999 */ 6000 dst_reg->s32_min_value = S32_MIN; 6001 dst_reg->s32_max_value = S32_MAX; 6002 6003 dst_reg->var_off = tnum_rshift(subreg, umin_val); 6004 dst_reg->u32_min_value >>= umax_val; 6005 dst_reg->u32_max_value >>= umin_val; 6006 6007 __mark_reg64_unbounded(dst_reg); 6008 __update_reg32_bounds(dst_reg); 6009 } 6010 6011 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg, 6012 struct bpf_reg_state *src_reg) 6013 { 6014 u64 umax_val = src_reg->umax_value; 6015 u64 umin_val = src_reg->umin_value; 6016 6017 /* BPF_RSH is an unsigned shift. If the value in dst_reg might 6018 * be negative, then either: 6019 * 1) src_reg might be zero, so the sign bit of the result is 6020 * unknown, so we lose our signed bounds 6021 * 2) it's known negative, thus the unsigned bounds capture the 6022 * signed bounds 6023 * 3) the signed bounds cross zero, so they tell us nothing 6024 * about the result 6025 * If the value in dst_reg is known nonnegative, then again the 6026 * unsigned bounts capture the signed bounds. 6027 * Thus, in all cases it suffices to blow away our signed bounds 6028 * and rely on inferring new ones from the unsigned bounds and 6029 * var_off of the result. 6030 */ 6031 dst_reg->smin_value = S64_MIN; 6032 dst_reg->smax_value = S64_MAX; 6033 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); 6034 dst_reg->umin_value >>= umax_val; 6035 dst_reg->umax_value >>= umin_val; 6036 6037 /* Its not easy to operate on alu32 bounds here because it depends 6038 * on bits being shifted in. Take easy way out and mark unbounded 6039 * so we can recalculate later from tnum. 6040 */ 6041 __mark_reg32_unbounded(dst_reg); 6042 __update_reg_bounds(dst_reg); 6043 } 6044 6045 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg, 6046 struct bpf_reg_state *src_reg) 6047 { 6048 u64 umin_val = src_reg->u32_min_value; 6049 6050 /* Upon reaching here, src_known is true and 6051 * umax_val is equal to umin_val. 6052 */ 6053 dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val); 6054 dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val); 6055 6056 dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32); 6057 6058 /* blow away the dst_reg umin_value/umax_value and rely on 6059 * dst_reg var_off to refine the result. 6060 */ 6061 dst_reg->u32_min_value = 0; 6062 dst_reg->u32_max_value = U32_MAX; 6063 6064 __mark_reg64_unbounded(dst_reg); 6065 __update_reg32_bounds(dst_reg); 6066 } 6067 6068 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg, 6069 struct bpf_reg_state *src_reg) 6070 { 6071 u64 umin_val = src_reg->umin_value; 6072 6073 /* Upon reaching here, src_known is true and umax_val is equal 6074 * to umin_val. 6075 */ 6076 dst_reg->smin_value >>= umin_val; 6077 dst_reg->smax_value >>= umin_val; 6078 6079 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64); 6080 6081 /* blow away the dst_reg umin_value/umax_value and rely on 6082 * dst_reg var_off to refine the result. 6083 */ 6084 dst_reg->umin_value = 0; 6085 dst_reg->umax_value = U64_MAX; 6086 6087 /* Its not easy to operate on alu32 bounds here because it depends 6088 * on bits being shifted in from upper 32-bits. Take easy way out 6089 * and mark unbounded so we can recalculate later from tnum. 6090 */ 6091 __mark_reg32_unbounded(dst_reg); 6092 __update_reg_bounds(dst_reg); 6093 } 6094 6095 /* WARNING: This function does calculations on 64-bit values, but the actual 6096 * execution may occur on 32-bit values. Therefore, things like bitshifts 6097 * need extra checks in the 32-bit case. 6098 */ 6099 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, 6100 struct bpf_insn *insn, 6101 struct bpf_reg_state *dst_reg, 6102 struct bpf_reg_state src_reg) 6103 { 6104 struct bpf_reg_state *regs = cur_regs(env); 6105 u8 opcode = BPF_OP(insn->code); 6106 bool src_known; 6107 s64 smin_val, smax_val; 6108 u64 umin_val, umax_val; 6109 s32 s32_min_val, s32_max_val; 6110 u32 u32_min_val, u32_max_val; 6111 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; 6112 u32 dst = insn->dst_reg; 6113 int ret; 6114 bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64); 6115 6116 smin_val = src_reg.smin_value; 6117 smax_val = src_reg.smax_value; 6118 umin_val = src_reg.umin_value; 6119 umax_val = src_reg.umax_value; 6120 6121 s32_min_val = src_reg.s32_min_value; 6122 s32_max_val = src_reg.s32_max_value; 6123 u32_min_val = src_reg.u32_min_value; 6124 u32_max_val = src_reg.u32_max_value; 6125 6126 if (alu32) { 6127 src_known = tnum_subreg_is_const(src_reg.var_off); 6128 if ((src_known && 6129 (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) || 6130 s32_min_val > s32_max_val || u32_min_val > u32_max_val) { 6131 /* Taint dst register if offset had invalid bounds 6132 * derived from e.g. dead branches. 6133 */ 6134 __mark_reg_unknown(env, dst_reg); 6135 return 0; 6136 } 6137 } else { 6138 src_known = tnum_is_const(src_reg.var_off); 6139 if ((src_known && 6140 (smin_val != smax_val || umin_val != umax_val)) || 6141 smin_val > smax_val || umin_val > umax_val) { 6142 /* Taint dst register if offset had invalid bounds 6143 * derived from e.g. dead branches. 6144 */ 6145 __mark_reg_unknown(env, dst_reg); 6146 return 0; 6147 } 6148 } 6149 6150 if (!src_known && 6151 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { 6152 __mark_reg_unknown(env, dst_reg); 6153 return 0; 6154 } 6155 6156 /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops. 6157 * There are two classes of instructions: The first class we track both 6158 * alu32 and alu64 sign/unsigned bounds independently this provides the 6159 * greatest amount of precision when alu operations are mixed with jmp32 6160 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD, 6161 * and BPF_OR. This is possible because these ops have fairly easy to 6162 * understand and calculate behavior in both 32-bit and 64-bit alu ops. 6163 * See alu32 verifier tests for examples. The second class of 6164 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy 6165 * with regards to tracking sign/unsigned bounds because the bits may 6166 * cross subreg boundaries in the alu64 case. When this happens we mark 6167 * the reg unbounded in the subreg bound space and use the resulting 6168 * tnum to calculate an approximation of the sign/unsigned bounds. 6169 */ 6170 switch (opcode) { 6171 case BPF_ADD: 6172 ret = sanitize_val_alu(env, insn); 6173 if (ret < 0) { 6174 verbose(env, "R%d tried to add from different pointers or scalars\n", dst); 6175 return ret; 6176 } 6177 scalar32_min_max_add(dst_reg, &src_reg); 6178 scalar_min_max_add(dst_reg, &src_reg); 6179 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); 6180 break; 6181 case BPF_SUB: 6182 ret = sanitize_val_alu(env, insn); 6183 if (ret < 0) { 6184 verbose(env, "R%d tried to sub from different pointers or scalars\n", dst); 6185 return ret; 6186 } 6187 scalar32_min_max_sub(dst_reg, &src_reg); 6188 scalar_min_max_sub(dst_reg, &src_reg); 6189 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off); 6190 break; 6191 case BPF_MUL: 6192 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off); 6193 scalar32_min_max_mul(dst_reg, &src_reg); 6194 scalar_min_max_mul(dst_reg, &src_reg); 6195 break; 6196 case BPF_AND: 6197 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off); 6198 scalar32_min_max_and(dst_reg, &src_reg); 6199 scalar_min_max_and(dst_reg, &src_reg); 6200 break; 6201 case BPF_OR: 6202 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off); 6203 scalar32_min_max_or(dst_reg, &src_reg); 6204 scalar_min_max_or(dst_reg, &src_reg); 6205 break; 6206 case BPF_XOR: 6207 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off); 6208 scalar32_min_max_xor(dst_reg, &src_reg); 6209 scalar_min_max_xor(dst_reg, &src_reg); 6210 break; 6211 case BPF_LSH: 6212 if (umax_val >= insn_bitness) { 6213 /* Shifts greater than 31 or 63 are undefined. 6214 * This includes shifts by a negative number. 6215 */ 6216 mark_reg_unknown(env, regs, insn->dst_reg); 6217 break; 6218 } 6219 if (alu32) 6220 scalar32_min_max_lsh(dst_reg, &src_reg); 6221 else 6222 scalar_min_max_lsh(dst_reg, &src_reg); 6223 break; 6224 case BPF_RSH: 6225 if (umax_val >= insn_bitness) { 6226 /* Shifts greater than 31 or 63 are undefined. 6227 * This includes shifts by a negative number. 6228 */ 6229 mark_reg_unknown(env, regs, insn->dst_reg); 6230 break; 6231 } 6232 if (alu32) 6233 scalar32_min_max_rsh(dst_reg, &src_reg); 6234 else 6235 scalar_min_max_rsh(dst_reg, &src_reg); 6236 break; 6237 case BPF_ARSH: 6238 if (umax_val >= insn_bitness) { 6239 /* Shifts greater than 31 or 63 are undefined. 6240 * This includes shifts by a negative number. 6241 */ 6242 mark_reg_unknown(env, regs, insn->dst_reg); 6243 break; 6244 } 6245 if (alu32) 6246 scalar32_min_max_arsh(dst_reg, &src_reg); 6247 else 6248 scalar_min_max_arsh(dst_reg, &src_reg); 6249 break; 6250 default: 6251 mark_reg_unknown(env, regs, insn->dst_reg); 6252 break; 6253 } 6254 6255 /* ALU32 ops are zero extended into 64bit register */ 6256 if (alu32) 6257 zext_32_to_64(dst_reg); 6258 6259 __update_reg_bounds(dst_reg); 6260 __reg_deduce_bounds(dst_reg); 6261 __reg_bound_offset(dst_reg); 6262 return 0; 6263 } 6264 6265 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max 6266 * and var_off. 6267 */ 6268 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, 6269 struct bpf_insn *insn) 6270 { 6271 struct bpf_verifier_state *vstate = env->cur_state; 6272 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 6273 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; 6274 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; 6275 u8 opcode = BPF_OP(insn->code); 6276 int err; 6277 6278 dst_reg = ®s[insn->dst_reg]; 6279 src_reg = NULL; 6280 if (dst_reg->type != SCALAR_VALUE) 6281 ptr_reg = dst_reg; 6282 if (BPF_SRC(insn->code) == BPF_X) { 6283 src_reg = ®s[insn->src_reg]; 6284 if (src_reg->type != SCALAR_VALUE) { 6285 if (dst_reg->type != SCALAR_VALUE) { 6286 /* Combining two pointers by any ALU op yields 6287 * an arbitrary scalar. Disallow all math except 6288 * pointer subtraction 6289 */ 6290 if (opcode == BPF_SUB && env->allow_ptr_leaks) { 6291 mark_reg_unknown(env, regs, insn->dst_reg); 6292 return 0; 6293 } 6294 verbose(env, "R%d pointer %s pointer prohibited\n", 6295 insn->dst_reg, 6296 bpf_alu_string[opcode >> 4]); 6297 return -EACCES; 6298 } else { 6299 /* scalar += pointer 6300 * This is legal, but we have to reverse our 6301 * src/dest handling in computing the range 6302 */ 6303 err = mark_chain_precision(env, insn->dst_reg); 6304 if (err) 6305 return err; 6306 return adjust_ptr_min_max_vals(env, insn, 6307 src_reg, dst_reg); 6308 } 6309 } else if (ptr_reg) { 6310 /* pointer += scalar */ 6311 err = mark_chain_precision(env, insn->src_reg); 6312 if (err) 6313 return err; 6314 return adjust_ptr_min_max_vals(env, insn, 6315 dst_reg, src_reg); 6316 } 6317 } else { 6318 /* Pretend the src is a reg with a known value, since we only 6319 * need to be able to read from this state. 6320 */ 6321 off_reg.type = SCALAR_VALUE; 6322 __mark_reg_known(&off_reg, insn->imm); 6323 src_reg = &off_reg; 6324 if (ptr_reg) /* pointer += K */ 6325 return adjust_ptr_min_max_vals(env, insn, 6326 ptr_reg, src_reg); 6327 } 6328 6329 /* Got here implies adding two SCALAR_VALUEs */ 6330 if (WARN_ON_ONCE(ptr_reg)) { 6331 print_verifier_state(env, state); 6332 verbose(env, "verifier internal error: unexpected ptr_reg\n"); 6333 return -EINVAL; 6334 } 6335 if (WARN_ON(!src_reg)) { 6336 print_verifier_state(env, state); 6337 verbose(env, "verifier internal error: no src_reg\n"); 6338 return -EINVAL; 6339 } 6340 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); 6341 } 6342 6343 /* check validity of 32-bit and 64-bit arithmetic operations */ 6344 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) 6345 { 6346 struct bpf_reg_state *regs = cur_regs(env); 6347 u8 opcode = BPF_OP(insn->code); 6348 int err; 6349 6350 if (opcode == BPF_END || opcode == BPF_NEG) { 6351 if (opcode == BPF_NEG) { 6352 if (BPF_SRC(insn->code) != 0 || 6353 insn->src_reg != BPF_REG_0 || 6354 insn->off != 0 || insn->imm != 0) { 6355 verbose(env, "BPF_NEG uses reserved fields\n"); 6356 return -EINVAL; 6357 } 6358 } else { 6359 if (insn->src_reg != BPF_REG_0 || insn->off != 0 || 6360 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) || 6361 BPF_CLASS(insn->code) == BPF_ALU64) { 6362 verbose(env, "BPF_END uses reserved fields\n"); 6363 return -EINVAL; 6364 } 6365 } 6366 6367 /* check src operand */ 6368 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 6369 if (err) 6370 return err; 6371 6372 if (is_pointer_value(env, insn->dst_reg)) { 6373 verbose(env, "R%d pointer arithmetic prohibited\n", 6374 insn->dst_reg); 6375 return -EACCES; 6376 } 6377 6378 /* check dest operand */ 6379 err = check_reg_arg(env, insn->dst_reg, DST_OP); 6380 if (err) 6381 return err; 6382 6383 } else if (opcode == BPF_MOV) { 6384 6385 if (BPF_SRC(insn->code) == BPF_X) { 6386 if (insn->imm != 0 || insn->off != 0) { 6387 verbose(env, "BPF_MOV uses reserved fields\n"); 6388 return -EINVAL; 6389 } 6390 6391 /* check src operand */ 6392 err = check_reg_arg(env, insn->src_reg, SRC_OP); 6393 if (err) 6394 return err; 6395 } else { 6396 if (insn->src_reg != BPF_REG_0 || insn->off != 0) { 6397 verbose(env, "BPF_MOV uses reserved fields\n"); 6398 return -EINVAL; 6399 } 6400 } 6401 6402 /* check dest operand, mark as required later */ 6403 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 6404 if (err) 6405 return err; 6406 6407 if (BPF_SRC(insn->code) == BPF_X) { 6408 struct bpf_reg_state *src_reg = regs + insn->src_reg; 6409 struct bpf_reg_state *dst_reg = regs + insn->dst_reg; 6410 6411 if (BPF_CLASS(insn->code) == BPF_ALU64) { 6412 /* case: R1 = R2 6413 * copy register state to dest reg 6414 */ 6415 *dst_reg = *src_reg; 6416 dst_reg->live |= REG_LIVE_WRITTEN; 6417 dst_reg->subreg_def = DEF_NOT_SUBREG; 6418 } else { 6419 /* R1 = (u32) R2 */ 6420 if (is_pointer_value(env, insn->src_reg)) { 6421 verbose(env, 6422 "R%d partial copy of pointer\n", 6423 insn->src_reg); 6424 return -EACCES; 6425 } else if (src_reg->type == SCALAR_VALUE) { 6426 *dst_reg = *src_reg; 6427 dst_reg->live |= REG_LIVE_WRITTEN; 6428 dst_reg->subreg_def = env->insn_idx + 1; 6429 } else { 6430 mark_reg_unknown(env, regs, 6431 insn->dst_reg); 6432 } 6433 zext_32_to_64(dst_reg); 6434 } 6435 } else { 6436 /* case: R = imm 6437 * remember the value we stored into this reg 6438 */ 6439 /* clear any state __mark_reg_known doesn't set */ 6440 mark_reg_unknown(env, regs, insn->dst_reg); 6441 regs[insn->dst_reg].type = SCALAR_VALUE; 6442 if (BPF_CLASS(insn->code) == BPF_ALU64) { 6443 __mark_reg_known(regs + insn->dst_reg, 6444 insn->imm); 6445 } else { 6446 __mark_reg_known(regs + insn->dst_reg, 6447 (u32)insn->imm); 6448 } 6449 } 6450 6451 } else if (opcode > BPF_END) { 6452 verbose(env, "invalid BPF_ALU opcode %x\n", opcode); 6453 return -EINVAL; 6454 6455 } else { /* all other ALU ops: and, sub, xor, add, ... */ 6456 6457 if (BPF_SRC(insn->code) == BPF_X) { 6458 if (insn->imm != 0 || insn->off != 0) { 6459 verbose(env, "BPF_ALU uses reserved fields\n"); 6460 return -EINVAL; 6461 } 6462 /* check src1 operand */ 6463 err = check_reg_arg(env, insn->src_reg, SRC_OP); 6464 if (err) 6465 return err; 6466 } else { 6467 if (insn->src_reg != BPF_REG_0 || insn->off != 0) { 6468 verbose(env, "BPF_ALU uses reserved fields\n"); 6469 return -EINVAL; 6470 } 6471 } 6472 6473 /* check src2 operand */ 6474 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 6475 if (err) 6476 return err; 6477 6478 if ((opcode == BPF_MOD || opcode == BPF_DIV) && 6479 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) { 6480 verbose(env, "div by zero\n"); 6481 return -EINVAL; 6482 } 6483 6484 if ((opcode == BPF_LSH || opcode == BPF_RSH || 6485 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { 6486 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; 6487 6488 if (insn->imm < 0 || insn->imm >= size) { 6489 verbose(env, "invalid shift %d\n", insn->imm); 6490 return -EINVAL; 6491 } 6492 } 6493 6494 /* check dest operand */ 6495 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 6496 if (err) 6497 return err; 6498 6499 return adjust_reg_min_max_vals(env, insn); 6500 } 6501 6502 return 0; 6503 } 6504 6505 static void __find_good_pkt_pointers(struct bpf_func_state *state, 6506 struct bpf_reg_state *dst_reg, 6507 enum bpf_reg_type type, u16 new_range) 6508 { 6509 struct bpf_reg_state *reg; 6510 int i; 6511 6512 for (i = 0; i < MAX_BPF_REG; i++) { 6513 reg = &state->regs[i]; 6514 if (reg->type == type && reg->id == dst_reg->id) 6515 /* keep the maximum range already checked */ 6516 reg->range = max(reg->range, new_range); 6517 } 6518 6519 bpf_for_each_spilled_reg(i, state, reg) { 6520 if (!reg) 6521 continue; 6522 if (reg->type == type && reg->id == dst_reg->id) 6523 reg->range = max(reg->range, new_range); 6524 } 6525 } 6526 6527 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, 6528 struct bpf_reg_state *dst_reg, 6529 enum bpf_reg_type type, 6530 bool range_right_open) 6531 { 6532 u16 new_range; 6533 int i; 6534 6535 if (dst_reg->off < 0 || 6536 (dst_reg->off == 0 && range_right_open)) 6537 /* This doesn't give us any range */ 6538 return; 6539 6540 if (dst_reg->umax_value > MAX_PACKET_OFF || 6541 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF) 6542 /* Risk of overflow. For instance, ptr + (1<<63) may be less 6543 * than pkt_end, but that's because it's also less than pkt. 6544 */ 6545 return; 6546 6547 new_range = dst_reg->off; 6548 if (range_right_open) 6549 new_range--; 6550 6551 /* Examples for register markings: 6552 * 6553 * pkt_data in dst register: 6554 * 6555 * r2 = r3; 6556 * r2 += 8; 6557 * if (r2 > pkt_end) goto <handle exception> 6558 * <access okay> 6559 * 6560 * r2 = r3; 6561 * r2 += 8; 6562 * if (r2 < pkt_end) goto <access okay> 6563 * <handle exception> 6564 * 6565 * Where: 6566 * r2 == dst_reg, pkt_end == src_reg 6567 * r2=pkt(id=n,off=8,r=0) 6568 * r3=pkt(id=n,off=0,r=0) 6569 * 6570 * pkt_data in src register: 6571 * 6572 * r2 = r3; 6573 * r2 += 8; 6574 * if (pkt_end >= r2) goto <access okay> 6575 * <handle exception> 6576 * 6577 * r2 = r3; 6578 * r2 += 8; 6579 * if (pkt_end <= r2) goto <handle exception> 6580 * <access okay> 6581 * 6582 * Where: 6583 * pkt_end == dst_reg, r2 == src_reg 6584 * r2=pkt(id=n,off=8,r=0) 6585 * r3=pkt(id=n,off=0,r=0) 6586 * 6587 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) 6588 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8) 6589 * and [r3, r3 + 8-1) respectively is safe to access depending on 6590 * the check. 6591 */ 6592 6593 /* If our ids match, then we must have the same max_value. And we 6594 * don't care about the other reg's fixed offset, since if it's too big 6595 * the range won't allow anything. 6596 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. 6597 */ 6598 for (i = 0; i <= vstate->curframe; i++) 6599 __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, 6600 new_range); 6601 } 6602 6603 static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) 6604 { 6605 struct tnum subreg = tnum_subreg(reg->var_off); 6606 s32 sval = (s32)val; 6607 6608 switch (opcode) { 6609 case BPF_JEQ: 6610 if (tnum_is_const(subreg)) 6611 return !!tnum_equals_const(subreg, val); 6612 break; 6613 case BPF_JNE: 6614 if (tnum_is_const(subreg)) 6615 return !tnum_equals_const(subreg, val); 6616 break; 6617 case BPF_JSET: 6618 if ((~subreg.mask & subreg.value) & val) 6619 return 1; 6620 if (!((subreg.mask | subreg.value) & val)) 6621 return 0; 6622 break; 6623 case BPF_JGT: 6624 if (reg->u32_min_value > val) 6625 return 1; 6626 else if (reg->u32_max_value <= val) 6627 return 0; 6628 break; 6629 case BPF_JSGT: 6630 if (reg->s32_min_value > sval) 6631 return 1; 6632 else if (reg->s32_max_value < sval) 6633 return 0; 6634 break; 6635 case BPF_JLT: 6636 if (reg->u32_max_value < val) 6637 return 1; 6638 else if (reg->u32_min_value >= val) 6639 return 0; 6640 break; 6641 case BPF_JSLT: 6642 if (reg->s32_max_value < sval) 6643 return 1; 6644 else if (reg->s32_min_value >= sval) 6645 return 0; 6646 break; 6647 case BPF_JGE: 6648 if (reg->u32_min_value >= val) 6649 return 1; 6650 else if (reg->u32_max_value < val) 6651 return 0; 6652 break; 6653 case BPF_JSGE: 6654 if (reg->s32_min_value >= sval) 6655 return 1; 6656 else if (reg->s32_max_value < sval) 6657 return 0; 6658 break; 6659 case BPF_JLE: 6660 if (reg->u32_max_value <= val) 6661 return 1; 6662 else if (reg->u32_min_value > val) 6663 return 0; 6664 break; 6665 case BPF_JSLE: 6666 if (reg->s32_max_value <= sval) 6667 return 1; 6668 else if (reg->s32_min_value > sval) 6669 return 0; 6670 break; 6671 } 6672 6673 return -1; 6674 } 6675 6676 6677 static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) 6678 { 6679 s64 sval = (s64)val; 6680 6681 switch (opcode) { 6682 case BPF_JEQ: 6683 if (tnum_is_const(reg->var_off)) 6684 return !!tnum_equals_const(reg->var_off, val); 6685 break; 6686 case BPF_JNE: 6687 if (tnum_is_const(reg->var_off)) 6688 return !tnum_equals_const(reg->var_off, val); 6689 break; 6690 case BPF_JSET: 6691 if ((~reg->var_off.mask & reg->var_off.value) & val) 6692 return 1; 6693 if (!((reg->var_off.mask | reg->var_off.value) & val)) 6694 return 0; 6695 break; 6696 case BPF_JGT: 6697 if (reg->umin_value > val) 6698 return 1; 6699 else if (reg->umax_value <= val) 6700 return 0; 6701 break; 6702 case BPF_JSGT: 6703 if (reg->smin_value > sval) 6704 return 1; 6705 else if (reg->smax_value < sval) 6706 return 0; 6707 break; 6708 case BPF_JLT: 6709 if (reg->umax_value < val) 6710 return 1; 6711 else if (reg->umin_value >= val) 6712 return 0; 6713 break; 6714 case BPF_JSLT: 6715 if (reg->smax_value < sval) 6716 return 1; 6717 else if (reg->smin_value >= sval) 6718 return 0; 6719 break; 6720 case BPF_JGE: 6721 if (reg->umin_value >= val) 6722 return 1; 6723 else if (reg->umax_value < val) 6724 return 0; 6725 break; 6726 case BPF_JSGE: 6727 if (reg->smin_value >= sval) 6728 return 1; 6729 else if (reg->smax_value < sval) 6730 return 0; 6731 break; 6732 case BPF_JLE: 6733 if (reg->umax_value <= val) 6734 return 1; 6735 else if (reg->umin_value > val) 6736 return 0; 6737 break; 6738 case BPF_JSLE: 6739 if (reg->smax_value <= sval) 6740 return 1; 6741 else if (reg->smin_value > sval) 6742 return 0; 6743 break; 6744 } 6745 6746 return -1; 6747 } 6748 6749 /* compute branch direction of the expression "if (reg opcode val) goto target;" 6750 * and return: 6751 * 1 - branch will be taken and "goto target" will be executed 6752 * 0 - branch will not be taken and fall-through to next insn 6753 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value 6754 * range [0,10] 6755 */ 6756 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, 6757 bool is_jmp32) 6758 { 6759 if (__is_pointer_value(false, reg)) { 6760 if (!reg_type_not_null(reg->type)) 6761 return -1; 6762 6763 /* If pointer is valid tests against zero will fail so we can 6764 * use this to direct branch taken. 6765 */ 6766 if (val != 0) 6767 return -1; 6768 6769 switch (opcode) { 6770 case BPF_JEQ: 6771 return 0; 6772 case BPF_JNE: 6773 return 1; 6774 default: 6775 return -1; 6776 } 6777 } 6778 6779 if (is_jmp32) 6780 return is_branch32_taken(reg, val, opcode); 6781 return is_branch64_taken(reg, val, opcode); 6782 } 6783 6784 /* Adjusts the register min/max values in the case that the dst_reg is the 6785 * variable register that we are working on, and src_reg is a constant or we're 6786 * simply doing a BPF_K check. 6787 * In JEQ/JNE cases we also adjust the var_off values. 6788 */ 6789 static void reg_set_min_max(struct bpf_reg_state *true_reg, 6790 struct bpf_reg_state *false_reg, 6791 u64 val, u32 val32, 6792 u8 opcode, bool is_jmp32) 6793 { 6794 struct tnum false_32off = tnum_subreg(false_reg->var_off); 6795 struct tnum false_64off = false_reg->var_off; 6796 struct tnum true_32off = tnum_subreg(true_reg->var_off); 6797 struct tnum true_64off = true_reg->var_off; 6798 s64 sval = (s64)val; 6799 s32 sval32 = (s32)val32; 6800 6801 /* If the dst_reg is a pointer, we can't learn anything about its 6802 * variable offset from the compare (unless src_reg were a pointer into 6803 * the same object, but we don't bother with that. 6804 * Since false_reg and true_reg have the same type by construction, we 6805 * only need to check one of them for pointerness. 6806 */ 6807 if (__is_pointer_value(false, false_reg)) 6808 return; 6809 6810 switch (opcode) { 6811 case BPF_JEQ: 6812 case BPF_JNE: 6813 { 6814 struct bpf_reg_state *reg = 6815 opcode == BPF_JEQ ? true_reg : false_reg; 6816 6817 /* For BPF_JEQ, if this is false we know nothing Jon Snow, but 6818 * if it is true we know the value for sure. Likewise for 6819 * BPF_JNE. 6820 */ 6821 if (is_jmp32) 6822 __mark_reg32_known(reg, val32); 6823 else 6824 __mark_reg_known(reg, val); 6825 break; 6826 } 6827 case BPF_JSET: 6828 if (is_jmp32) { 6829 false_32off = tnum_and(false_32off, tnum_const(~val32)); 6830 if (is_power_of_2(val32)) 6831 true_32off = tnum_or(true_32off, 6832 tnum_const(val32)); 6833 } else { 6834 false_64off = tnum_and(false_64off, tnum_const(~val)); 6835 if (is_power_of_2(val)) 6836 true_64off = tnum_or(true_64off, 6837 tnum_const(val)); 6838 } 6839 break; 6840 case BPF_JGE: 6841 case BPF_JGT: 6842 { 6843 if (is_jmp32) { 6844 u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1; 6845 u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32; 6846 6847 false_reg->u32_max_value = min(false_reg->u32_max_value, 6848 false_umax); 6849 true_reg->u32_min_value = max(true_reg->u32_min_value, 6850 true_umin); 6851 } else { 6852 u64 false_umax = opcode == BPF_JGT ? val : val - 1; 6853 u64 true_umin = opcode == BPF_JGT ? val + 1 : val; 6854 6855 false_reg->umax_value = min(false_reg->umax_value, false_umax); 6856 true_reg->umin_value = max(true_reg->umin_value, true_umin); 6857 } 6858 break; 6859 } 6860 case BPF_JSGE: 6861 case BPF_JSGT: 6862 { 6863 if (is_jmp32) { 6864 s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1; 6865 s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32; 6866 6867 false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax); 6868 true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin); 6869 } else { 6870 s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1; 6871 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval; 6872 6873 false_reg->smax_value = min(false_reg->smax_value, false_smax); 6874 true_reg->smin_value = max(true_reg->smin_value, true_smin); 6875 } 6876 break; 6877 } 6878 case BPF_JLE: 6879 case BPF_JLT: 6880 { 6881 if (is_jmp32) { 6882 u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1; 6883 u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32; 6884 6885 false_reg->u32_min_value = max(false_reg->u32_min_value, 6886 false_umin); 6887 true_reg->u32_max_value = min(true_reg->u32_max_value, 6888 true_umax); 6889 } else { 6890 u64 false_umin = opcode == BPF_JLT ? val : val + 1; 6891 u64 true_umax = opcode == BPF_JLT ? val - 1 : val; 6892 6893 false_reg->umin_value = max(false_reg->umin_value, false_umin); 6894 true_reg->umax_value = min(true_reg->umax_value, true_umax); 6895 } 6896 break; 6897 } 6898 case BPF_JSLE: 6899 case BPF_JSLT: 6900 { 6901 if (is_jmp32) { 6902 s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1; 6903 s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32; 6904 6905 false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin); 6906 true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax); 6907 } else { 6908 s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1; 6909 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval; 6910 6911 false_reg->smin_value = max(false_reg->smin_value, false_smin); 6912 true_reg->smax_value = min(true_reg->smax_value, true_smax); 6913 } 6914 break; 6915 } 6916 default: 6917 return; 6918 } 6919 6920 if (is_jmp32) { 6921 false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off), 6922 tnum_subreg(false_32off)); 6923 true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off), 6924 tnum_subreg(true_32off)); 6925 __reg_combine_32_into_64(false_reg); 6926 __reg_combine_32_into_64(true_reg); 6927 } else { 6928 false_reg->var_off = false_64off; 6929 true_reg->var_off = true_64off; 6930 __reg_combine_64_into_32(false_reg); 6931 __reg_combine_64_into_32(true_reg); 6932 } 6933 } 6934 6935 /* Same as above, but for the case that dst_reg holds a constant and src_reg is 6936 * the variable reg. 6937 */ 6938 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, 6939 struct bpf_reg_state *false_reg, 6940 u64 val, u32 val32, 6941 u8 opcode, bool is_jmp32) 6942 { 6943 /* How can we transform "a <op> b" into "b <op> a"? */ 6944 static const u8 opcode_flip[16] = { 6945 /* these stay the same */ 6946 [BPF_JEQ >> 4] = BPF_JEQ, 6947 [BPF_JNE >> 4] = BPF_JNE, 6948 [BPF_JSET >> 4] = BPF_JSET, 6949 /* these swap "lesser" and "greater" (L and G in the opcodes) */ 6950 [BPF_JGE >> 4] = BPF_JLE, 6951 [BPF_JGT >> 4] = BPF_JLT, 6952 [BPF_JLE >> 4] = BPF_JGE, 6953 [BPF_JLT >> 4] = BPF_JGT, 6954 [BPF_JSGE >> 4] = BPF_JSLE, 6955 [BPF_JSGT >> 4] = BPF_JSLT, 6956 [BPF_JSLE >> 4] = BPF_JSGE, 6957 [BPF_JSLT >> 4] = BPF_JSGT 6958 }; 6959 opcode = opcode_flip[opcode >> 4]; 6960 /* This uses zero as "not present in table"; luckily the zero opcode, 6961 * BPF_JA, can't get here. 6962 */ 6963 if (opcode) 6964 reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32); 6965 } 6966 6967 /* Regs are known to be equal, so intersect their min/max/var_off */ 6968 static void __reg_combine_min_max(struct bpf_reg_state *src_reg, 6969 struct bpf_reg_state *dst_reg) 6970 { 6971 src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value, 6972 dst_reg->umin_value); 6973 src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value, 6974 dst_reg->umax_value); 6975 src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value, 6976 dst_reg->smin_value); 6977 src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value, 6978 dst_reg->smax_value); 6979 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off, 6980 dst_reg->var_off); 6981 /* We might have learned new bounds from the var_off. */ 6982 __update_reg_bounds(src_reg); 6983 __update_reg_bounds(dst_reg); 6984 /* We might have learned something about the sign bit. */ 6985 __reg_deduce_bounds(src_reg); 6986 __reg_deduce_bounds(dst_reg); 6987 /* We might have learned some bits from the bounds. */ 6988 __reg_bound_offset(src_reg); 6989 __reg_bound_offset(dst_reg); 6990 /* Intersecting with the old var_off might have improved our bounds 6991 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), 6992 * then new var_off is (0; 0x7f...fc) which improves our umax. 6993 */ 6994 __update_reg_bounds(src_reg); 6995 __update_reg_bounds(dst_reg); 6996 } 6997 6998 static void reg_combine_min_max(struct bpf_reg_state *true_src, 6999 struct bpf_reg_state *true_dst, 7000 struct bpf_reg_state *false_src, 7001 struct bpf_reg_state *false_dst, 7002 u8 opcode) 7003 { 7004 switch (opcode) { 7005 case BPF_JEQ: 7006 __reg_combine_min_max(true_src, true_dst); 7007 break; 7008 case BPF_JNE: 7009 __reg_combine_min_max(false_src, false_dst); 7010 break; 7011 } 7012 } 7013 7014 static void mark_ptr_or_null_reg(struct bpf_func_state *state, 7015 struct bpf_reg_state *reg, u32 id, 7016 bool is_null) 7017 { 7018 if (reg_type_may_be_null(reg->type) && reg->id == id) { 7019 /* Old offset (both fixed and variable parts) should 7020 * have been known-zero, because we don't allow pointer 7021 * arithmetic on pointers that might be NULL. 7022 */ 7023 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || 7024 !tnum_equals_const(reg->var_off, 0) || 7025 reg->off)) { 7026 __mark_reg_known_zero(reg); 7027 reg->off = 0; 7028 } 7029 if (is_null) { 7030 reg->type = SCALAR_VALUE; 7031 } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) { 7032 const struct bpf_map *map = reg->map_ptr; 7033 7034 if (map->inner_map_meta) { 7035 reg->type = CONST_PTR_TO_MAP; 7036 reg->map_ptr = map->inner_map_meta; 7037 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { 7038 reg->type = PTR_TO_XDP_SOCK; 7039 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || 7040 map->map_type == BPF_MAP_TYPE_SOCKHASH) { 7041 reg->type = PTR_TO_SOCKET; 7042 } else { 7043 reg->type = PTR_TO_MAP_VALUE; 7044 } 7045 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { 7046 reg->type = PTR_TO_SOCKET; 7047 } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) { 7048 reg->type = PTR_TO_SOCK_COMMON; 7049 } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { 7050 reg->type = PTR_TO_TCP_SOCK; 7051 } else if (reg->type == PTR_TO_BTF_ID_OR_NULL) { 7052 reg->type = PTR_TO_BTF_ID; 7053 } else if (reg->type == PTR_TO_MEM_OR_NULL) { 7054 reg->type = PTR_TO_MEM; 7055 } else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) { 7056 reg->type = PTR_TO_RDONLY_BUF; 7057 } else if (reg->type == PTR_TO_RDWR_BUF_OR_NULL) { 7058 reg->type = PTR_TO_RDWR_BUF; 7059 } 7060 if (is_null) { 7061 /* We don't need id and ref_obj_id from this point 7062 * onwards anymore, thus we should better reset it, 7063 * so that state pruning has chances to take effect. 7064 */ 7065 reg->id = 0; 7066 reg->ref_obj_id = 0; 7067 } else if (!reg_may_point_to_spin_lock(reg)) { 7068 /* For not-NULL ptr, reg->ref_obj_id will be reset 7069 * in release_reg_references(). 7070 * 7071 * reg->id is still used by spin_lock ptr. Other 7072 * than spin_lock ptr type, reg->id can be reset. 7073 */ 7074 reg->id = 0; 7075 } 7076 } 7077 } 7078 7079 static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, 7080 bool is_null) 7081 { 7082 struct bpf_reg_state *reg; 7083 int i; 7084 7085 for (i = 0; i < MAX_BPF_REG; i++) 7086 mark_ptr_or_null_reg(state, &state->regs[i], id, is_null); 7087 7088 bpf_for_each_spilled_reg(i, state, reg) { 7089 if (!reg) 7090 continue; 7091 mark_ptr_or_null_reg(state, reg, id, is_null); 7092 } 7093 } 7094 7095 /* The logic is similar to find_good_pkt_pointers(), both could eventually 7096 * be folded together at some point. 7097 */ 7098 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, 7099 bool is_null) 7100 { 7101 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 7102 struct bpf_reg_state *regs = state->regs; 7103 u32 ref_obj_id = regs[regno].ref_obj_id; 7104 u32 id = regs[regno].id; 7105 int i; 7106 7107 if (ref_obj_id && ref_obj_id == id && is_null) 7108 /* regs[regno] is in the " == NULL" branch. 7109 * No one could have freed the reference state before 7110 * doing the NULL check. 7111 */ 7112 WARN_ON_ONCE(release_reference_state(state, id)); 7113 7114 for (i = 0; i <= vstate->curframe; i++) 7115 __mark_ptr_or_null_regs(vstate->frame[i], id, is_null); 7116 } 7117 7118 static bool try_match_pkt_pointers(const struct bpf_insn *insn, 7119 struct bpf_reg_state *dst_reg, 7120 struct bpf_reg_state *src_reg, 7121 struct bpf_verifier_state *this_branch, 7122 struct bpf_verifier_state *other_branch) 7123 { 7124 if (BPF_SRC(insn->code) != BPF_X) 7125 return false; 7126 7127 /* Pointers are always 64-bit. */ 7128 if (BPF_CLASS(insn->code) == BPF_JMP32) 7129 return false; 7130 7131 switch (BPF_OP(insn->code)) { 7132 case BPF_JGT: 7133 if ((dst_reg->type == PTR_TO_PACKET && 7134 src_reg->type == PTR_TO_PACKET_END) || 7135 (dst_reg->type == PTR_TO_PACKET_META && 7136 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 7137 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */ 7138 find_good_pkt_pointers(this_branch, dst_reg, 7139 dst_reg->type, false); 7140 } else if ((dst_reg->type == PTR_TO_PACKET_END && 7141 src_reg->type == PTR_TO_PACKET) || 7142 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 7143 src_reg->type == PTR_TO_PACKET_META)) { 7144 /* pkt_end > pkt_data', pkt_data > pkt_meta' */ 7145 find_good_pkt_pointers(other_branch, src_reg, 7146 src_reg->type, true); 7147 } else { 7148 return false; 7149 } 7150 break; 7151 case BPF_JLT: 7152 if ((dst_reg->type == PTR_TO_PACKET && 7153 src_reg->type == PTR_TO_PACKET_END) || 7154 (dst_reg->type == PTR_TO_PACKET_META && 7155 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 7156 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */ 7157 find_good_pkt_pointers(other_branch, dst_reg, 7158 dst_reg->type, true); 7159 } else if ((dst_reg->type == PTR_TO_PACKET_END && 7160 src_reg->type == PTR_TO_PACKET) || 7161 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 7162 src_reg->type == PTR_TO_PACKET_META)) { 7163 /* pkt_end < pkt_data', pkt_data > pkt_meta' */ 7164 find_good_pkt_pointers(this_branch, src_reg, 7165 src_reg->type, false); 7166 } else { 7167 return false; 7168 } 7169 break; 7170 case BPF_JGE: 7171 if ((dst_reg->type == PTR_TO_PACKET && 7172 src_reg->type == PTR_TO_PACKET_END) || 7173 (dst_reg->type == PTR_TO_PACKET_META && 7174 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 7175 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */ 7176 find_good_pkt_pointers(this_branch, dst_reg, 7177 dst_reg->type, true); 7178 } else if ((dst_reg->type == PTR_TO_PACKET_END && 7179 src_reg->type == PTR_TO_PACKET) || 7180 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 7181 src_reg->type == PTR_TO_PACKET_META)) { 7182 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */ 7183 find_good_pkt_pointers(other_branch, src_reg, 7184 src_reg->type, false); 7185 } else { 7186 return false; 7187 } 7188 break; 7189 case BPF_JLE: 7190 if ((dst_reg->type == PTR_TO_PACKET && 7191 src_reg->type == PTR_TO_PACKET_END) || 7192 (dst_reg->type == PTR_TO_PACKET_META && 7193 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 7194 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */ 7195 find_good_pkt_pointers(other_branch, dst_reg, 7196 dst_reg->type, false); 7197 } else if ((dst_reg->type == PTR_TO_PACKET_END && 7198 src_reg->type == PTR_TO_PACKET) || 7199 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 7200 src_reg->type == PTR_TO_PACKET_META)) { 7201 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */ 7202 find_good_pkt_pointers(this_branch, src_reg, 7203 src_reg->type, true); 7204 } else { 7205 return false; 7206 } 7207 break; 7208 default: 7209 return false; 7210 } 7211 7212 return true; 7213 } 7214 7215 static int check_cond_jmp_op(struct bpf_verifier_env *env, 7216 struct bpf_insn *insn, int *insn_idx) 7217 { 7218 struct bpf_verifier_state *this_branch = env->cur_state; 7219 struct bpf_verifier_state *other_branch; 7220 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; 7221 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; 7222 u8 opcode = BPF_OP(insn->code); 7223 bool is_jmp32; 7224 int pred = -1; 7225 int err; 7226 7227 /* Only conditional jumps are expected to reach here. */ 7228 if (opcode == BPF_JA || opcode > BPF_JSLE) { 7229 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode); 7230 return -EINVAL; 7231 } 7232 7233 if (BPF_SRC(insn->code) == BPF_X) { 7234 if (insn->imm != 0) { 7235 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); 7236 return -EINVAL; 7237 } 7238 7239 /* check src1 operand */ 7240 err = check_reg_arg(env, insn->src_reg, SRC_OP); 7241 if (err) 7242 return err; 7243 7244 if (is_pointer_value(env, insn->src_reg)) { 7245 verbose(env, "R%d pointer comparison prohibited\n", 7246 insn->src_reg); 7247 return -EACCES; 7248 } 7249 src_reg = ®s[insn->src_reg]; 7250 } else { 7251 if (insn->src_reg != BPF_REG_0) { 7252 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); 7253 return -EINVAL; 7254 } 7255 } 7256 7257 /* check src2 operand */ 7258 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 7259 if (err) 7260 return err; 7261 7262 dst_reg = ®s[insn->dst_reg]; 7263 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 7264 7265 if (BPF_SRC(insn->code) == BPF_K) { 7266 pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32); 7267 } else if (src_reg->type == SCALAR_VALUE && 7268 is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) { 7269 pred = is_branch_taken(dst_reg, 7270 tnum_subreg(src_reg->var_off).value, 7271 opcode, 7272 is_jmp32); 7273 } else if (src_reg->type == SCALAR_VALUE && 7274 !is_jmp32 && tnum_is_const(src_reg->var_off)) { 7275 pred = is_branch_taken(dst_reg, 7276 src_reg->var_off.value, 7277 opcode, 7278 is_jmp32); 7279 } 7280 7281 if (pred >= 0) { 7282 /* If we get here with a dst_reg pointer type it is because 7283 * above is_branch_taken() special cased the 0 comparison. 7284 */ 7285 if (!__is_pointer_value(false, dst_reg)) 7286 err = mark_chain_precision(env, insn->dst_reg); 7287 if (BPF_SRC(insn->code) == BPF_X && !err) 7288 err = mark_chain_precision(env, insn->src_reg); 7289 if (err) 7290 return err; 7291 } 7292 if (pred == 1) { 7293 /* only follow the goto, ignore fall-through */ 7294 *insn_idx += insn->off; 7295 return 0; 7296 } else if (pred == 0) { 7297 /* only follow fall-through branch, since 7298 * that's where the program will go 7299 */ 7300 return 0; 7301 } 7302 7303 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, 7304 false); 7305 if (!other_branch) 7306 return -EFAULT; 7307 other_branch_regs = other_branch->frame[other_branch->curframe]->regs; 7308 7309 /* detect if we are comparing against a constant value so we can adjust 7310 * our min/max values for our dst register. 7311 * this is only legit if both are scalars (or pointers to the same 7312 * object, I suppose, but we don't support that right now), because 7313 * otherwise the different base pointers mean the offsets aren't 7314 * comparable. 7315 */ 7316 if (BPF_SRC(insn->code) == BPF_X) { 7317 struct bpf_reg_state *src_reg = ®s[insn->src_reg]; 7318 7319 if (dst_reg->type == SCALAR_VALUE && 7320 src_reg->type == SCALAR_VALUE) { 7321 if (tnum_is_const(src_reg->var_off) || 7322 (is_jmp32 && 7323 tnum_is_const(tnum_subreg(src_reg->var_off)))) 7324 reg_set_min_max(&other_branch_regs[insn->dst_reg], 7325 dst_reg, 7326 src_reg->var_off.value, 7327 tnum_subreg(src_reg->var_off).value, 7328 opcode, is_jmp32); 7329 else if (tnum_is_const(dst_reg->var_off) || 7330 (is_jmp32 && 7331 tnum_is_const(tnum_subreg(dst_reg->var_off)))) 7332 reg_set_min_max_inv(&other_branch_regs[insn->src_reg], 7333 src_reg, 7334 dst_reg->var_off.value, 7335 tnum_subreg(dst_reg->var_off).value, 7336 opcode, is_jmp32); 7337 else if (!is_jmp32 && 7338 (opcode == BPF_JEQ || opcode == BPF_JNE)) 7339 /* Comparing for equality, we can combine knowledge */ 7340 reg_combine_min_max(&other_branch_regs[insn->src_reg], 7341 &other_branch_regs[insn->dst_reg], 7342 src_reg, dst_reg, opcode); 7343 } 7344 } else if (dst_reg->type == SCALAR_VALUE) { 7345 reg_set_min_max(&other_branch_regs[insn->dst_reg], 7346 dst_reg, insn->imm, (u32)insn->imm, 7347 opcode, is_jmp32); 7348 } 7349 7350 /* detect if R == 0 where R is returned from bpf_map_lookup_elem(). 7351 * NOTE: these optimizations below are related with pointer comparison 7352 * which will never be JMP32. 7353 */ 7354 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && 7355 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && 7356 reg_type_may_be_null(dst_reg->type)) { 7357 /* Mark all identical registers in each branch as either 7358 * safe or unknown depending R == 0 or R != 0 conditional. 7359 */ 7360 mark_ptr_or_null_regs(this_branch, insn->dst_reg, 7361 opcode == BPF_JNE); 7362 mark_ptr_or_null_regs(other_branch, insn->dst_reg, 7363 opcode == BPF_JEQ); 7364 } else if (!try_match_pkt_pointers(insn, dst_reg, ®s[insn->src_reg], 7365 this_branch, other_branch) && 7366 is_pointer_value(env, insn->dst_reg)) { 7367 verbose(env, "R%d pointer comparison prohibited\n", 7368 insn->dst_reg); 7369 return -EACCES; 7370 } 7371 if (env->log.level & BPF_LOG_LEVEL) 7372 print_verifier_state(env, this_branch->frame[this_branch->curframe]); 7373 return 0; 7374 } 7375 7376 /* verify BPF_LD_IMM64 instruction */ 7377 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) 7378 { 7379 struct bpf_insn_aux_data *aux = cur_aux(env); 7380 struct bpf_reg_state *regs = cur_regs(env); 7381 struct bpf_map *map; 7382 int err; 7383 7384 if (BPF_SIZE(insn->code) != BPF_DW) { 7385 verbose(env, "invalid BPF_LD_IMM insn\n"); 7386 return -EINVAL; 7387 } 7388 if (insn->off != 0) { 7389 verbose(env, "BPF_LD_IMM64 uses reserved fields\n"); 7390 return -EINVAL; 7391 } 7392 7393 err = check_reg_arg(env, insn->dst_reg, DST_OP); 7394 if (err) 7395 return err; 7396 7397 if (insn->src_reg == 0) { 7398 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; 7399 7400 regs[insn->dst_reg].type = SCALAR_VALUE; 7401 __mark_reg_known(®s[insn->dst_reg], imm); 7402 return 0; 7403 } 7404 7405 map = env->used_maps[aux->map_index]; 7406 mark_reg_known_zero(env, regs, insn->dst_reg); 7407 regs[insn->dst_reg].map_ptr = map; 7408 7409 if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) { 7410 regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; 7411 regs[insn->dst_reg].off = aux->map_off; 7412 if (map_value_has_spin_lock(map)) 7413 regs[insn->dst_reg].id = ++env->id_gen; 7414 } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) { 7415 regs[insn->dst_reg].type = CONST_PTR_TO_MAP; 7416 } else { 7417 verbose(env, "bpf verifier is misconfigured\n"); 7418 return -EINVAL; 7419 } 7420 7421 return 0; 7422 } 7423 7424 static bool may_access_skb(enum bpf_prog_type type) 7425 { 7426 switch (type) { 7427 case BPF_PROG_TYPE_SOCKET_FILTER: 7428 case BPF_PROG_TYPE_SCHED_CLS: 7429 case BPF_PROG_TYPE_SCHED_ACT: 7430 return true; 7431 default: 7432 return false; 7433 } 7434 } 7435 7436 /* verify safety of LD_ABS|LD_IND instructions: 7437 * - they can only appear in the programs where ctx == skb 7438 * - since they are wrappers of function calls, they scratch R1-R5 registers, 7439 * preserve R6-R9, and store return value into R0 7440 * 7441 * Implicit input: 7442 * ctx == skb == R6 == CTX 7443 * 7444 * Explicit input: 7445 * SRC == any register 7446 * IMM == 32-bit immediate 7447 * 7448 * Output: 7449 * R0 - 8/16/32-bit skb data converted to cpu endianness 7450 */ 7451 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) 7452 { 7453 struct bpf_reg_state *regs = cur_regs(env); 7454 static const int ctx_reg = BPF_REG_6; 7455 u8 mode = BPF_MODE(insn->code); 7456 int i, err; 7457 7458 if (!may_access_skb(resolve_prog_type(env->prog))) { 7459 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n"); 7460 return -EINVAL; 7461 } 7462 7463 if (!env->ops->gen_ld_abs) { 7464 verbose(env, "bpf verifier is misconfigured\n"); 7465 return -EINVAL; 7466 } 7467 7468 if (env->subprog_cnt > 1) { 7469 /* when program has LD_ABS insn JITs and interpreter assume 7470 * that r1 == ctx == skb which is not the case for callees 7471 * that can have arbitrary arguments. It's problematic 7472 * for main prog as well since JITs would need to analyze 7473 * all functions in order to make proper register save/restore 7474 * decisions in the main prog. Hence disallow LD_ABS with calls 7475 */ 7476 verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n"); 7477 return -EINVAL; 7478 } 7479 7480 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || 7481 BPF_SIZE(insn->code) == BPF_DW || 7482 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { 7483 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n"); 7484 return -EINVAL; 7485 } 7486 7487 /* check whether implicit source operand (register R6) is readable */ 7488 err = check_reg_arg(env, ctx_reg, SRC_OP); 7489 if (err) 7490 return err; 7491 7492 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as 7493 * gen_ld_abs() may terminate the program at runtime, leading to 7494 * reference leak. 7495 */ 7496 err = check_reference_leak(env); 7497 if (err) { 7498 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n"); 7499 return err; 7500 } 7501 7502 if (env->cur_state->active_spin_lock) { 7503 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n"); 7504 return -EINVAL; 7505 } 7506 7507 if (regs[ctx_reg].type != PTR_TO_CTX) { 7508 verbose(env, 7509 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); 7510 return -EINVAL; 7511 } 7512 7513 if (mode == BPF_IND) { 7514 /* check explicit source operand */ 7515 err = check_reg_arg(env, insn->src_reg, SRC_OP); 7516 if (err) 7517 return err; 7518 } 7519 7520 err = check_ctx_reg(env, ®s[ctx_reg], ctx_reg); 7521 if (err < 0) 7522 return err; 7523 7524 /* reset caller saved regs to unreadable */ 7525 for (i = 0; i < CALLER_SAVED_REGS; i++) { 7526 mark_reg_not_init(env, regs, caller_saved[i]); 7527 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 7528 } 7529 7530 /* mark destination R0 register as readable, since it contains 7531 * the value fetched from the packet. 7532 * Already marked as written above. 7533 */ 7534 mark_reg_unknown(env, regs, BPF_REG_0); 7535 /* ld_abs load up to 32-bit skb data. */ 7536 regs[BPF_REG_0].subreg_def = env->insn_idx + 1; 7537 return 0; 7538 } 7539 7540 static int check_return_code(struct bpf_verifier_env *env) 7541 { 7542 struct tnum enforce_attach_type_range = tnum_unknown; 7543 const struct bpf_prog *prog = env->prog; 7544 struct bpf_reg_state *reg; 7545 struct tnum range = tnum_range(0, 1); 7546 enum bpf_prog_type prog_type = resolve_prog_type(env->prog); 7547 int err; 7548 7549 /* LSM and struct_ops func-ptr's return type could be "void" */ 7550 if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS || 7551 prog_type == BPF_PROG_TYPE_LSM) && 7552 !prog->aux->attach_func_proto->type) 7553 return 0; 7554 7555 /* eBPF calling convetion is such that R0 is used 7556 * to return the value from eBPF program. 7557 * Make sure that it's readable at this time 7558 * of bpf_exit, which means that program wrote 7559 * something into it earlier 7560 */ 7561 err = check_reg_arg(env, BPF_REG_0, SRC_OP); 7562 if (err) 7563 return err; 7564 7565 if (is_pointer_value(env, BPF_REG_0)) { 7566 verbose(env, "R0 leaks addr as return value\n"); 7567 return -EACCES; 7568 } 7569 7570 switch (prog_type) { 7571 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 7572 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || 7573 env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG || 7574 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME || 7575 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME || 7576 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME || 7577 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME) 7578 range = tnum_range(1, 1); 7579 break; 7580 case BPF_PROG_TYPE_CGROUP_SKB: 7581 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) { 7582 range = tnum_range(0, 3); 7583 enforce_attach_type_range = tnum_range(2, 3); 7584 } 7585 break; 7586 case BPF_PROG_TYPE_CGROUP_SOCK: 7587 case BPF_PROG_TYPE_SOCK_OPS: 7588 case BPF_PROG_TYPE_CGROUP_DEVICE: 7589 case BPF_PROG_TYPE_CGROUP_SYSCTL: 7590 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 7591 break; 7592 case BPF_PROG_TYPE_RAW_TRACEPOINT: 7593 if (!env->prog->aux->attach_btf_id) 7594 return 0; 7595 range = tnum_const(0); 7596 break; 7597 case BPF_PROG_TYPE_TRACING: 7598 switch (env->prog->expected_attach_type) { 7599 case BPF_TRACE_FENTRY: 7600 case BPF_TRACE_FEXIT: 7601 range = tnum_const(0); 7602 break; 7603 case BPF_TRACE_RAW_TP: 7604 case BPF_MODIFY_RETURN: 7605 return 0; 7606 case BPF_TRACE_ITER: 7607 break; 7608 default: 7609 return -ENOTSUPP; 7610 } 7611 break; 7612 case BPF_PROG_TYPE_SK_LOOKUP: 7613 range = tnum_range(SK_DROP, SK_PASS); 7614 break; 7615 case BPF_PROG_TYPE_EXT: 7616 /* freplace program can return anything as its return value 7617 * depends on the to-be-replaced kernel func or bpf program. 7618 */ 7619 default: 7620 return 0; 7621 } 7622 7623 reg = cur_regs(env) + BPF_REG_0; 7624 if (reg->type != SCALAR_VALUE) { 7625 verbose(env, "At program exit the register R0 is not a known value (%s)\n", 7626 reg_type_str[reg->type]); 7627 return -EINVAL; 7628 } 7629 7630 if (!tnum_in(range, reg->var_off)) { 7631 char tn_buf[48]; 7632 7633 verbose(env, "At program exit the register R0 "); 7634 if (!tnum_is_unknown(reg->var_off)) { 7635 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 7636 verbose(env, "has value %s", tn_buf); 7637 } else { 7638 verbose(env, "has unknown scalar value"); 7639 } 7640 tnum_strn(tn_buf, sizeof(tn_buf), range); 7641 verbose(env, " should have been in %s\n", tn_buf); 7642 return -EINVAL; 7643 } 7644 7645 if (!tnum_is_unknown(enforce_attach_type_range) && 7646 tnum_in(enforce_attach_type_range, reg->var_off)) 7647 env->prog->enforce_expected_attach_type = 1; 7648 return 0; 7649 } 7650 7651 /* non-recursive DFS pseudo code 7652 * 1 procedure DFS-iterative(G,v): 7653 * 2 label v as discovered 7654 * 3 let S be a stack 7655 * 4 S.push(v) 7656 * 5 while S is not empty 7657 * 6 t <- S.pop() 7658 * 7 if t is what we're looking for: 7659 * 8 return t 7660 * 9 for all edges e in G.adjacentEdges(t) do 7661 * 10 if edge e is already labelled 7662 * 11 continue with the next edge 7663 * 12 w <- G.adjacentVertex(t,e) 7664 * 13 if vertex w is not discovered and not explored 7665 * 14 label e as tree-edge 7666 * 15 label w as discovered 7667 * 16 S.push(w) 7668 * 17 continue at 5 7669 * 18 else if vertex w is discovered 7670 * 19 label e as back-edge 7671 * 20 else 7672 * 21 // vertex w is explored 7673 * 22 label e as forward- or cross-edge 7674 * 23 label t as explored 7675 * 24 S.pop() 7676 * 7677 * convention: 7678 * 0x10 - discovered 7679 * 0x11 - discovered and fall-through edge labelled 7680 * 0x12 - discovered and fall-through and branch edges labelled 7681 * 0x20 - explored 7682 */ 7683 7684 enum { 7685 DISCOVERED = 0x10, 7686 EXPLORED = 0x20, 7687 FALLTHROUGH = 1, 7688 BRANCH = 2, 7689 }; 7690 7691 static u32 state_htab_size(struct bpf_verifier_env *env) 7692 { 7693 return env->prog->len; 7694 } 7695 7696 static struct bpf_verifier_state_list **explored_state( 7697 struct bpf_verifier_env *env, 7698 int idx) 7699 { 7700 struct bpf_verifier_state *cur = env->cur_state; 7701 struct bpf_func_state *state = cur->frame[cur->curframe]; 7702 7703 return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; 7704 } 7705 7706 static void init_explored_state(struct bpf_verifier_env *env, int idx) 7707 { 7708 env->insn_aux_data[idx].prune_point = true; 7709 } 7710 7711 /* t, w, e - match pseudo-code above: 7712 * t - index of current instruction 7713 * w - next instruction 7714 * e - edge 7715 */ 7716 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, 7717 bool loop_ok) 7718 { 7719 int *insn_stack = env->cfg.insn_stack; 7720 int *insn_state = env->cfg.insn_state; 7721 7722 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) 7723 return 0; 7724 7725 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH)) 7726 return 0; 7727 7728 if (w < 0 || w >= env->prog->len) { 7729 verbose_linfo(env, t, "%d: ", t); 7730 verbose(env, "jump out of range from insn %d to %d\n", t, w); 7731 return -EINVAL; 7732 } 7733 7734 if (e == BRANCH) 7735 /* mark branch target for state pruning */ 7736 init_explored_state(env, w); 7737 7738 if (insn_state[w] == 0) { 7739 /* tree-edge */ 7740 insn_state[t] = DISCOVERED | e; 7741 insn_state[w] = DISCOVERED; 7742 if (env->cfg.cur_stack >= env->prog->len) 7743 return -E2BIG; 7744 insn_stack[env->cfg.cur_stack++] = w; 7745 return 1; 7746 } else if ((insn_state[w] & 0xF0) == DISCOVERED) { 7747 if (loop_ok && env->bpf_capable) 7748 return 0; 7749 verbose_linfo(env, t, "%d: ", t); 7750 verbose_linfo(env, w, "%d: ", w); 7751 verbose(env, "back-edge from insn %d to %d\n", t, w); 7752 return -EINVAL; 7753 } else if (insn_state[w] == EXPLORED) { 7754 /* forward- or cross-edge */ 7755 insn_state[t] = DISCOVERED | e; 7756 } else { 7757 verbose(env, "insn state internal bug\n"); 7758 return -EFAULT; 7759 } 7760 return 0; 7761 } 7762 7763 /* non-recursive depth-first-search to detect loops in BPF program 7764 * loop == back-edge in directed graph 7765 */ 7766 static int check_cfg(struct bpf_verifier_env *env) 7767 { 7768 struct bpf_insn *insns = env->prog->insnsi; 7769 int insn_cnt = env->prog->len; 7770 int *insn_stack, *insn_state; 7771 int ret = 0; 7772 int i, t; 7773 7774 insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); 7775 if (!insn_state) 7776 return -ENOMEM; 7777 7778 insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); 7779 if (!insn_stack) { 7780 kvfree(insn_state); 7781 return -ENOMEM; 7782 } 7783 7784 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */ 7785 insn_stack[0] = 0; /* 0 is the first instruction */ 7786 env->cfg.cur_stack = 1; 7787 7788 peek_stack: 7789 if (env->cfg.cur_stack == 0) 7790 goto check_state; 7791 t = insn_stack[env->cfg.cur_stack - 1]; 7792 7793 if (BPF_CLASS(insns[t].code) == BPF_JMP || 7794 BPF_CLASS(insns[t].code) == BPF_JMP32) { 7795 u8 opcode = BPF_OP(insns[t].code); 7796 7797 if (opcode == BPF_EXIT) { 7798 goto mark_explored; 7799 } else if (opcode == BPF_CALL) { 7800 ret = push_insn(t, t + 1, FALLTHROUGH, env, false); 7801 if (ret == 1) 7802 goto peek_stack; 7803 else if (ret < 0) 7804 goto err_free; 7805 if (t + 1 < insn_cnt) 7806 init_explored_state(env, t + 1); 7807 if (insns[t].src_reg == BPF_PSEUDO_CALL) { 7808 init_explored_state(env, t); 7809 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, 7810 env, false); 7811 if (ret == 1) 7812 goto peek_stack; 7813 else if (ret < 0) 7814 goto err_free; 7815 } 7816 } else if (opcode == BPF_JA) { 7817 if (BPF_SRC(insns[t].code) != BPF_K) { 7818 ret = -EINVAL; 7819 goto err_free; 7820 } 7821 /* unconditional jump with single edge */ 7822 ret = push_insn(t, t + insns[t].off + 1, 7823 FALLTHROUGH, env, true); 7824 if (ret == 1) 7825 goto peek_stack; 7826 else if (ret < 0) 7827 goto err_free; 7828 /* unconditional jmp is not a good pruning point, 7829 * but it's marked, since backtracking needs 7830 * to record jmp history in is_state_visited(). 7831 */ 7832 init_explored_state(env, t + insns[t].off + 1); 7833 /* tell verifier to check for equivalent states 7834 * after every call and jump 7835 */ 7836 if (t + 1 < insn_cnt) 7837 init_explored_state(env, t + 1); 7838 } else { 7839 /* conditional jump with two edges */ 7840 init_explored_state(env, t); 7841 ret = push_insn(t, t + 1, FALLTHROUGH, env, true); 7842 if (ret == 1) 7843 goto peek_stack; 7844 else if (ret < 0) 7845 goto err_free; 7846 7847 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true); 7848 if (ret == 1) 7849 goto peek_stack; 7850 else if (ret < 0) 7851 goto err_free; 7852 } 7853 } else { 7854 /* all other non-branch instructions with single 7855 * fall-through edge 7856 */ 7857 ret = push_insn(t, t + 1, FALLTHROUGH, env, false); 7858 if (ret == 1) 7859 goto peek_stack; 7860 else if (ret < 0) 7861 goto err_free; 7862 } 7863 7864 mark_explored: 7865 insn_state[t] = EXPLORED; 7866 if (env->cfg.cur_stack-- <= 0) { 7867 verbose(env, "pop stack internal bug\n"); 7868 ret = -EFAULT; 7869 goto err_free; 7870 } 7871 goto peek_stack; 7872 7873 check_state: 7874 for (i = 0; i < insn_cnt; i++) { 7875 if (insn_state[i] != EXPLORED) { 7876 verbose(env, "unreachable insn %d\n", i); 7877 ret = -EINVAL; 7878 goto err_free; 7879 } 7880 } 7881 ret = 0; /* cfg looks good */ 7882 7883 err_free: 7884 kvfree(insn_state); 7885 kvfree(insn_stack); 7886 env->cfg.insn_state = env->cfg.insn_stack = NULL; 7887 return ret; 7888 } 7889 7890 /* The minimum supported BTF func info size */ 7891 #define MIN_BPF_FUNCINFO_SIZE 8 7892 #define MAX_FUNCINFO_REC_SIZE 252 7893 7894 static int check_btf_func(struct bpf_verifier_env *env, 7895 const union bpf_attr *attr, 7896 union bpf_attr __user *uattr) 7897 { 7898 u32 i, nfuncs, urec_size, min_size; 7899 u32 krec_size = sizeof(struct bpf_func_info); 7900 struct bpf_func_info *krecord; 7901 struct bpf_func_info_aux *info_aux = NULL; 7902 const struct btf_type *type; 7903 struct bpf_prog *prog; 7904 const struct btf *btf; 7905 void __user *urecord; 7906 u32 prev_offset = 0; 7907 int ret = -ENOMEM; 7908 7909 nfuncs = attr->func_info_cnt; 7910 if (!nfuncs) 7911 return 0; 7912 7913 if (nfuncs != env->subprog_cnt) { 7914 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n"); 7915 return -EINVAL; 7916 } 7917 7918 urec_size = attr->func_info_rec_size; 7919 if (urec_size < MIN_BPF_FUNCINFO_SIZE || 7920 urec_size > MAX_FUNCINFO_REC_SIZE || 7921 urec_size % sizeof(u32)) { 7922 verbose(env, "invalid func info rec size %u\n", urec_size); 7923 return -EINVAL; 7924 } 7925 7926 prog = env->prog; 7927 btf = prog->aux->btf; 7928 7929 urecord = u64_to_user_ptr(attr->func_info); 7930 min_size = min_t(u32, krec_size, urec_size); 7931 7932 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN); 7933 if (!krecord) 7934 return -ENOMEM; 7935 info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN); 7936 if (!info_aux) 7937 goto err_free; 7938 7939 for (i = 0; i < nfuncs; i++) { 7940 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size); 7941 if (ret) { 7942 if (ret == -E2BIG) { 7943 verbose(env, "nonzero tailing record in func info"); 7944 /* set the size kernel expects so loader can zero 7945 * out the rest of the record. 7946 */ 7947 if (put_user(min_size, &uattr->func_info_rec_size)) 7948 ret = -EFAULT; 7949 } 7950 goto err_free; 7951 } 7952 7953 if (copy_from_user(&krecord[i], urecord, min_size)) { 7954 ret = -EFAULT; 7955 goto err_free; 7956 } 7957 7958 /* check insn_off */ 7959 if (i == 0) { 7960 if (krecord[i].insn_off) { 7961 verbose(env, 7962 "nonzero insn_off %u for the first func info record", 7963 krecord[i].insn_off); 7964 ret = -EINVAL; 7965 goto err_free; 7966 } 7967 } else if (krecord[i].insn_off <= prev_offset) { 7968 verbose(env, 7969 "same or smaller insn offset (%u) than previous func info record (%u)", 7970 krecord[i].insn_off, prev_offset); 7971 ret = -EINVAL; 7972 goto err_free; 7973 } 7974 7975 if (env->subprog_info[i].start != krecord[i].insn_off) { 7976 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n"); 7977 ret = -EINVAL; 7978 goto err_free; 7979 } 7980 7981 /* check type_id */ 7982 type = btf_type_by_id(btf, krecord[i].type_id); 7983 if (!type || !btf_type_is_func(type)) { 7984 verbose(env, "invalid type id %d in func info", 7985 krecord[i].type_id); 7986 ret = -EINVAL; 7987 goto err_free; 7988 } 7989 info_aux[i].linkage = BTF_INFO_VLEN(type->info); 7990 prev_offset = krecord[i].insn_off; 7991 urecord += urec_size; 7992 } 7993 7994 prog->aux->func_info = krecord; 7995 prog->aux->func_info_cnt = nfuncs; 7996 prog->aux->func_info_aux = info_aux; 7997 return 0; 7998 7999 err_free: 8000 kvfree(krecord); 8001 kfree(info_aux); 8002 return ret; 8003 } 8004 8005 static void adjust_btf_func(struct bpf_verifier_env *env) 8006 { 8007 struct bpf_prog_aux *aux = env->prog->aux; 8008 int i; 8009 8010 if (!aux->func_info) 8011 return; 8012 8013 for (i = 0; i < env->subprog_cnt; i++) 8014 aux->func_info[i].insn_off = env->subprog_info[i].start; 8015 } 8016 8017 #define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \ 8018 sizeof(((struct bpf_line_info *)(0))->line_col)) 8019 #define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE 8020 8021 static int check_btf_line(struct bpf_verifier_env *env, 8022 const union bpf_attr *attr, 8023 union bpf_attr __user *uattr) 8024 { 8025 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0; 8026 struct bpf_subprog_info *sub; 8027 struct bpf_line_info *linfo; 8028 struct bpf_prog *prog; 8029 const struct btf *btf; 8030 void __user *ulinfo; 8031 int err; 8032 8033 nr_linfo = attr->line_info_cnt; 8034 if (!nr_linfo) 8035 return 0; 8036 8037 rec_size = attr->line_info_rec_size; 8038 if (rec_size < MIN_BPF_LINEINFO_SIZE || 8039 rec_size > MAX_LINEINFO_REC_SIZE || 8040 rec_size & (sizeof(u32) - 1)) 8041 return -EINVAL; 8042 8043 /* Need to zero it in case the userspace may 8044 * pass in a smaller bpf_line_info object. 8045 */ 8046 linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info), 8047 GFP_KERNEL | __GFP_NOWARN); 8048 if (!linfo) 8049 return -ENOMEM; 8050 8051 prog = env->prog; 8052 btf = prog->aux->btf; 8053 8054 s = 0; 8055 sub = env->subprog_info; 8056 ulinfo = u64_to_user_ptr(attr->line_info); 8057 expected_size = sizeof(struct bpf_line_info); 8058 ncopy = min_t(u32, expected_size, rec_size); 8059 for (i = 0; i < nr_linfo; i++) { 8060 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size); 8061 if (err) { 8062 if (err == -E2BIG) { 8063 verbose(env, "nonzero tailing record in line_info"); 8064 if (put_user(expected_size, 8065 &uattr->line_info_rec_size)) 8066 err = -EFAULT; 8067 } 8068 goto err_free; 8069 } 8070 8071 if (copy_from_user(&linfo[i], ulinfo, ncopy)) { 8072 err = -EFAULT; 8073 goto err_free; 8074 } 8075 8076 /* 8077 * Check insn_off to ensure 8078 * 1) strictly increasing AND 8079 * 2) bounded by prog->len 8080 * 8081 * The linfo[0].insn_off == 0 check logically falls into 8082 * the later "missing bpf_line_info for func..." case 8083 * because the first linfo[0].insn_off must be the 8084 * first sub also and the first sub must have 8085 * subprog_info[0].start == 0. 8086 */ 8087 if ((i && linfo[i].insn_off <= prev_offset) || 8088 linfo[i].insn_off >= prog->len) { 8089 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n", 8090 i, linfo[i].insn_off, prev_offset, 8091 prog->len); 8092 err = -EINVAL; 8093 goto err_free; 8094 } 8095 8096 if (!prog->insnsi[linfo[i].insn_off].code) { 8097 verbose(env, 8098 "Invalid insn code at line_info[%u].insn_off\n", 8099 i); 8100 err = -EINVAL; 8101 goto err_free; 8102 } 8103 8104 if (!btf_name_by_offset(btf, linfo[i].line_off) || 8105 !btf_name_by_offset(btf, linfo[i].file_name_off)) { 8106 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i); 8107 err = -EINVAL; 8108 goto err_free; 8109 } 8110 8111 if (s != env->subprog_cnt) { 8112 if (linfo[i].insn_off == sub[s].start) { 8113 sub[s].linfo_idx = i; 8114 s++; 8115 } else if (sub[s].start < linfo[i].insn_off) { 8116 verbose(env, "missing bpf_line_info for func#%u\n", s); 8117 err = -EINVAL; 8118 goto err_free; 8119 } 8120 } 8121 8122 prev_offset = linfo[i].insn_off; 8123 ulinfo += rec_size; 8124 } 8125 8126 if (s != env->subprog_cnt) { 8127 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n", 8128 env->subprog_cnt - s, s); 8129 err = -EINVAL; 8130 goto err_free; 8131 } 8132 8133 prog->aux->linfo = linfo; 8134 prog->aux->nr_linfo = nr_linfo; 8135 8136 return 0; 8137 8138 err_free: 8139 kvfree(linfo); 8140 return err; 8141 } 8142 8143 static int check_btf_info(struct bpf_verifier_env *env, 8144 const union bpf_attr *attr, 8145 union bpf_attr __user *uattr) 8146 { 8147 struct btf *btf; 8148 int err; 8149 8150 if (!attr->func_info_cnt && !attr->line_info_cnt) 8151 return 0; 8152 8153 btf = btf_get_by_fd(attr->prog_btf_fd); 8154 if (IS_ERR(btf)) 8155 return PTR_ERR(btf); 8156 env->prog->aux->btf = btf; 8157 8158 err = check_btf_func(env, attr, uattr); 8159 if (err) 8160 return err; 8161 8162 err = check_btf_line(env, attr, uattr); 8163 if (err) 8164 return err; 8165 8166 return 0; 8167 } 8168 8169 /* check %cur's range satisfies %old's */ 8170 static bool range_within(struct bpf_reg_state *old, 8171 struct bpf_reg_state *cur) 8172 { 8173 return old->umin_value <= cur->umin_value && 8174 old->umax_value >= cur->umax_value && 8175 old->smin_value <= cur->smin_value && 8176 old->smax_value >= cur->smax_value; 8177 } 8178 8179 /* Maximum number of register states that can exist at once */ 8180 #define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) 8181 struct idpair { 8182 u32 old; 8183 u32 cur; 8184 }; 8185 8186 /* If in the old state two registers had the same id, then they need to have 8187 * the same id in the new state as well. But that id could be different from 8188 * the old state, so we need to track the mapping from old to new ids. 8189 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent 8190 * regs with old id 5 must also have new id 9 for the new state to be safe. But 8191 * regs with a different old id could still have new id 9, we don't care about 8192 * that. 8193 * So we look through our idmap to see if this old id has been seen before. If 8194 * so, we require the new id to match; otherwise, we add the id pair to the map. 8195 */ 8196 static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap) 8197 { 8198 unsigned int i; 8199 8200 for (i = 0; i < ID_MAP_SIZE; i++) { 8201 if (!idmap[i].old) { 8202 /* Reached an empty slot; haven't seen this id before */ 8203 idmap[i].old = old_id; 8204 idmap[i].cur = cur_id; 8205 return true; 8206 } 8207 if (idmap[i].old == old_id) 8208 return idmap[i].cur == cur_id; 8209 } 8210 /* We ran out of idmap slots, which should be impossible */ 8211 WARN_ON_ONCE(1); 8212 return false; 8213 } 8214 8215 static void clean_func_state(struct bpf_verifier_env *env, 8216 struct bpf_func_state *st) 8217 { 8218 enum bpf_reg_liveness live; 8219 int i, j; 8220 8221 for (i = 0; i < BPF_REG_FP; i++) { 8222 live = st->regs[i].live; 8223 /* liveness must not touch this register anymore */ 8224 st->regs[i].live |= REG_LIVE_DONE; 8225 if (!(live & REG_LIVE_READ)) 8226 /* since the register is unused, clear its state 8227 * to make further comparison simpler 8228 */ 8229 __mark_reg_not_init(env, &st->regs[i]); 8230 } 8231 8232 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) { 8233 live = st->stack[i].spilled_ptr.live; 8234 /* liveness must not touch this stack slot anymore */ 8235 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE; 8236 if (!(live & REG_LIVE_READ)) { 8237 __mark_reg_not_init(env, &st->stack[i].spilled_ptr); 8238 for (j = 0; j < BPF_REG_SIZE; j++) 8239 st->stack[i].slot_type[j] = STACK_INVALID; 8240 } 8241 } 8242 } 8243 8244 static void clean_verifier_state(struct bpf_verifier_env *env, 8245 struct bpf_verifier_state *st) 8246 { 8247 int i; 8248 8249 if (st->frame[0]->regs[0].live & REG_LIVE_DONE) 8250 /* all regs in this state in all frames were already marked */ 8251 return; 8252 8253 for (i = 0; i <= st->curframe; i++) 8254 clean_func_state(env, st->frame[i]); 8255 } 8256 8257 /* the parentage chains form a tree. 8258 * the verifier states are added to state lists at given insn and 8259 * pushed into state stack for future exploration. 8260 * when the verifier reaches bpf_exit insn some of the verifer states 8261 * stored in the state lists have their final liveness state already, 8262 * but a lot of states will get revised from liveness point of view when 8263 * the verifier explores other branches. 8264 * Example: 8265 * 1: r0 = 1 8266 * 2: if r1 == 100 goto pc+1 8267 * 3: r0 = 2 8268 * 4: exit 8269 * when the verifier reaches exit insn the register r0 in the state list of 8270 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch 8271 * of insn 2 and goes exploring further. At the insn 4 it will walk the 8272 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ. 8273 * 8274 * Since the verifier pushes the branch states as it sees them while exploring 8275 * the program the condition of walking the branch instruction for the second 8276 * time means that all states below this branch were already explored and 8277 * their final liveness markes are already propagated. 8278 * Hence when the verifier completes the search of state list in is_state_visited() 8279 * we can call this clean_live_states() function to mark all liveness states 8280 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state' 8281 * will not be used. 8282 * This function also clears the registers and stack for states that !READ 8283 * to simplify state merging. 8284 * 8285 * Important note here that walking the same branch instruction in the callee 8286 * doesn't meant that the states are DONE. The verifier has to compare 8287 * the callsites 8288 */ 8289 static void clean_live_states(struct bpf_verifier_env *env, int insn, 8290 struct bpf_verifier_state *cur) 8291 { 8292 struct bpf_verifier_state_list *sl; 8293 int i; 8294 8295 sl = *explored_state(env, insn); 8296 while (sl) { 8297 if (sl->state.branches) 8298 goto next; 8299 if (sl->state.insn_idx != insn || 8300 sl->state.curframe != cur->curframe) 8301 goto next; 8302 for (i = 0; i <= cur->curframe; i++) 8303 if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) 8304 goto next; 8305 clean_verifier_state(env, &sl->state); 8306 next: 8307 sl = sl->next; 8308 } 8309 } 8310 8311 /* Returns true if (rold safe implies rcur safe) */ 8312 static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, 8313 struct idpair *idmap) 8314 { 8315 bool equal; 8316 8317 if (!(rold->live & REG_LIVE_READ)) 8318 /* explored state didn't use this */ 8319 return true; 8320 8321 equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0; 8322 8323 if (rold->type == PTR_TO_STACK) 8324 /* two stack pointers are equal only if they're pointing to 8325 * the same stack frame, since fp-8 in foo != fp-8 in bar 8326 */ 8327 return equal && rold->frameno == rcur->frameno; 8328 8329 if (equal) 8330 return true; 8331 8332 if (rold->type == NOT_INIT) 8333 /* explored state can't have used this */ 8334 return true; 8335 if (rcur->type == NOT_INIT) 8336 return false; 8337 switch (rold->type) { 8338 case SCALAR_VALUE: 8339 if (rcur->type == SCALAR_VALUE) { 8340 if (!rold->precise && !rcur->precise) 8341 return true; 8342 /* new val must satisfy old val knowledge */ 8343 return range_within(rold, rcur) && 8344 tnum_in(rold->var_off, rcur->var_off); 8345 } else { 8346 /* We're trying to use a pointer in place of a scalar. 8347 * Even if the scalar was unbounded, this could lead to 8348 * pointer leaks because scalars are allowed to leak 8349 * while pointers are not. We could make this safe in 8350 * special cases if root is calling us, but it's 8351 * probably not worth the hassle. 8352 */ 8353 return false; 8354 } 8355 case PTR_TO_MAP_VALUE: 8356 /* If the new min/max/var_off satisfy the old ones and 8357 * everything else matches, we are OK. 8358 * 'id' is not compared, since it's only used for maps with 8359 * bpf_spin_lock inside map element and in such cases if 8360 * the rest of the prog is valid for one map element then 8361 * it's valid for all map elements regardless of the key 8362 * used in bpf_map_lookup() 8363 */ 8364 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 8365 range_within(rold, rcur) && 8366 tnum_in(rold->var_off, rcur->var_off); 8367 case PTR_TO_MAP_VALUE_OR_NULL: 8368 /* a PTR_TO_MAP_VALUE could be safe to use as a 8369 * PTR_TO_MAP_VALUE_OR_NULL into the same map. 8370 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- 8371 * checked, doing so could have affected others with the same 8372 * id, and we can't check for that because we lost the id when 8373 * we converted to a PTR_TO_MAP_VALUE. 8374 */ 8375 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL) 8376 return false; 8377 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) 8378 return false; 8379 /* Check our ids match any regs they're supposed to */ 8380 return check_ids(rold->id, rcur->id, idmap); 8381 case PTR_TO_PACKET_META: 8382 case PTR_TO_PACKET: 8383 if (rcur->type != rold->type) 8384 return false; 8385 /* We must have at least as much range as the old ptr 8386 * did, so that any accesses which were safe before are 8387 * still safe. This is true even if old range < old off, 8388 * since someone could have accessed through (ptr - k), or 8389 * even done ptr -= k in a register, to get a safe access. 8390 */ 8391 if (rold->range > rcur->range) 8392 return false; 8393 /* If the offsets don't match, we can't trust our alignment; 8394 * nor can we be sure that we won't fall out of range. 8395 */ 8396 if (rold->off != rcur->off) 8397 return false; 8398 /* id relations must be preserved */ 8399 if (rold->id && !check_ids(rold->id, rcur->id, idmap)) 8400 return false; 8401 /* new val must satisfy old val knowledge */ 8402 return range_within(rold, rcur) && 8403 tnum_in(rold->var_off, rcur->var_off); 8404 case PTR_TO_CTX: 8405 case CONST_PTR_TO_MAP: 8406 case PTR_TO_PACKET_END: 8407 case PTR_TO_FLOW_KEYS: 8408 case PTR_TO_SOCKET: 8409 case PTR_TO_SOCKET_OR_NULL: 8410 case PTR_TO_SOCK_COMMON: 8411 case PTR_TO_SOCK_COMMON_OR_NULL: 8412 case PTR_TO_TCP_SOCK: 8413 case PTR_TO_TCP_SOCK_OR_NULL: 8414 case PTR_TO_XDP_SOCK: 8415 /* Only valid matches are exact, which memcmp() above 8416 * would have accepted 8417 */ 8418 default: 8419 /* Don't know what's going on, just say it's not safe */ 8420 return false; 8421 } 8422 8423 /* Shouldn't get here; if we do, say it's not safe */ 8424 WARN_ON_ONCE(1); 8425 return false; 8426 } 8427 8428 static bool stacksafe(struct bpf_func_state *old, 8429 struct bpf_func_state *cur, 8430 struct idpair *idmap) 8431 { 8432 int i, spi; 8433 8434 /* walk slots of the explored stack and ignore any additional 8435 * slots in the current stack, since explored(safe) state 8436 * didn't use them 8437 */ 8438 for (i = 0; i < old->allocated_stack; i++) { 8439 spi = i / BPF_REG_SIZE; 8440 8441 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) { 8442 i += BPF_REG_SIZE - 1; 8443 /* explored state didn't use this */ 8444 continue; 8445 } 8446 8447 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) 8448 continue; 8449 8450 /* explored stack has more populated slots than current stack 8451 * and these slots were used 8452 */ 8453 if (i >= cur->allocated_stack) 8454 return false; 8455 8456 /* if old state was safe with misc data in the stack 8457 * it will be safe with zero-initialized stack. 8458 * The opposite is not true 8459 */ 8460 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC && 8461 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO) 8462 continue; 8463 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != 8464 cur->stack[spi].slot_type[i % BPF_REG_SIZE]) 8465 /* Ex: old explored (safe) state has STACK_SPILL in 8466 * this stack slot, but current has STACK_MISC -> 8467 * this verifier states are not equivalent, 8468 * return false to continue verification of this path 8469 */ 8470 return false; 8471 if (i % BPF_REG_SIZE) 8472 continue; 8473 if (old->stack[spi].slot_type[0] != STACK_SPILL) 8474 continue; 8475 if (!regsafe(&old->stack[spi].spilled_ptr, 8476 &cur->stack[spi].spilled_ptr, 8477 idmap)) 8478 /* when explored and current stack slot are both storing 8479 * spilled registers, check that stored pointers types 8480 * are the same as well. 8481 * Ex: explored safe path could have stored 8482 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8} 8483 * but current path has stored: 8484 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16} 8485 * such verifier states are not equivalent. 8486 * return false to continue verification of this path 8487 */ 8488 return false; 8489 } 8490 return true; 8491 } 8492 8493 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur) 8494 { 8495 if (old->acquired_refs != cur->acquired_refs) 8496 return false; 8497 return !memcmp(old->refs, cur->refs, 8498 sizeof(*old->refs) * old->acquired_refs); 8499 } 8500 8501 /* compare two verifier states 8502 * 8503 * all states stored in state_list are known to be valid, since 8504 * verifier reached 'bpf_exit' instruction through them 8505 * 8506 * this function is called when verifier exploring different branches of 8507 * execution popped from the state stack. If it sees an old state that has 8508 * more strict register state and more strict stack state then this execution 8509 * branch doesn't need to be explored further, since verifier already 8510 * concluded that more strict state leads to valid finish. 8511 * 8512 * Therefore two states are equivalent if register state is more conservative 8513 * and explored stack state is more conservative than the current one. 8514 * Example: 8515 * explored current 8516 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC) 8517 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC) 8518 * 8519 * In other words if current stack state (one being explored) has more 8520 * valid slots than old one that already passed validation, it means 8521 * the verifier can stop exploring and conclude that current state is valid too 8522 * 8523 * Similarly with registers. If explored state has register type as invalid 8524 * whereas register type in current state is meaningful, it means that 8525 * the current state will reach 'bpf_exit' instruction safely 8526 */ 8527 static bool func_states_equal(struct bpf_func_state *old, 8528 struct bpf_func_state *cur) 8529 { 8530 struct idpair *idmap; 8531 bool ret = false; 8532 int i; 8533 8534 idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL); 8535 /* If we failed to allocate the idmap, just say it's not safe */ 8536 if (!idmap) 8537 return false; 8538 8539 for (i = 0; i < MAX_BPF_REG; i++) { 8540 if (!regsafe(&old->regs[i], &cur->regs[i], idmap)) 8541 goto out_free; 8542 } 8543 8544 if (!stacksafe(old, cur, idmap)) 8545 goto out_free; 8546 8547 if (!refsafe(old, cur)) 8548 goto out_free; 8549 ret = true; 8550 out_free: 8551 kfree(idmap); 8552 return ret; 8553 } 8554 8555 static bool states_equal(struct bpf_verifier_env *env, 8556 struct bpf_verifier_state *old, 8557 struct bpf_verifier_state *cur) 8558 { 8559 int i; 8560 8561 if (old->curframe != cur->curframe) 8562 return false; 8563 8564 /* Verification state from speculative execution simulation 8565 * must never prune a non-speculative execution one. 8566 */ 8567 if (old->speculative && !cur->speculative) 8568 return false; 8569 8570 if (old->active_spin_lock != cur->active_spin_lock) 8571 return false; 8572 8573 /* for states to be equal callsites have to be the same 8574 * and all frame states need to be equivalent 8575 */ 8576 for (i = 0; i <= old->curframe; i++) { 8577 if (old->frame[i]->callsite != cur->frame[i]->callsite) 8578 return false; 8579 if (!func_states_equal(old->frame[i], cur->frame[i])) 8580 return false; 8581 } 8582 return true; 8583 } 8584 8585 /* Return 0 if no propagation happened. Return negative error code if error 8586 * happened. Otherwise, return the propagated bit. 8587 */ 8588 static int propagate_liveness_reg(struct bpf_verifier_env *env, 8589 struct bpf_reg_state *reg, 8590 struct bpf_reg_state *parent_reg) 8591 { 8592 u8 parent_flag = parent_reg->live & REG_LIVE_READ; 8593 u8 flag = reg->live & REG_LIVE_READ; 8594 int err; 8595 8596 /* When comes here, read flags of PARENT_REG or REG could be any of 8597 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need 8598 * of propagation if PARENT_REG has strongest REG_LIVE_READ64. 8599 */ 8600 if (parent_flag == REG_LIVE_READ64 || 8601 /* Or if there is no read flag from REG. */ 8602 !flag || 8603 /* Or if the read flag from REG is the same as PARENT_REG. */ 8604 parent_flag == flag) 8605 return 0; 8606 8607 err = mark_reg_read(env, reg, parent_reg, flag); 8608 if (err) 8609 return err; 8610 8611 return flag; 8612 } 8613 8614 /* A write screens off any subsequent reads; but write marks come from the 8615 * straight-line code between a state and its parent. When we arrive at an 8616 * equivalent state (jump target or such) we didn't arrive by the straight-line 8617 * code, so read marks in the state must propagate to the parent regardless 8618 * of the state's write marks. That's what 'parent == state->parent' comparison 8619 * in mark_reg_read() is for. 8620 */ 8621 static int propagate_liveness(struct bpf_verifier_env *env, 8622 const struct bpf_verifier_state *vstate, 8623 struct bpf_verifier_state *vparent) 8624 { 8625 struct bpf_reg_state *state_reg, *parent_reg; 8626 struct bpf_func_state *state, *parent; 8627 int i, frame, err = 0; 8628 8629 if (vparent->curframe != vstate->curframe) { 8630 WARN(1, "propagate_live: parent frame %d current frame %d\n", 8631 vparent->curframe, vstate->curframe); 8632 return -EFAULT; 8633 } 8634 /* Propagate read liveness of registers... */ 8635 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); 8636 for (frame = 0; frame <= vstate->curframe; frame++) { 8637 parent = vparent->frame[frame]; 8638 state = vstate->frame[frame]; 8639 parent_reg = parent->regs; 8640 state_reg = state->regs; 8641 /* We don't need to worry about FP liveness, it's read-only */ 8642 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { 8643 err = propagate_liveness_reg(env, &state_reg[i], 8644 &parent_reg[i]); 8645 if (err < 0) 8646 return err; 8647 if (err == REG_LIVE_READ64) 8648 mark_insn_zext(env, &parent_reg[i]); 8649 } 8650 8651 /* Propagate stack slots. */ 8652 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && 8653 i < parent->allocated_stack / BPF_REG_SIZE; i++) { 8654 parent_reg = &parent->stack[i].spilled_ptr; 8655 state_reg = &state->stack[i].spilled_ptr; 8656 err = propagate_liveness_reg(env, state_reg, 8657 parent_reg); 8658 if (err < 0) 8659 return err; 8660 } 8661 } 8662 return 0; 8663 } 8664 8665 /* find precise scalars in the previous equivalent state and 8666 * propagate them into the current state 8667 */ 8668 static int propagate_precision(struct bpf_verifier_env *env, 8669 const struct bpf_verifier_state *old) 8670 { 8671 struct bpf_reg_state *state_reg; 8672 struct bpf_func_state *state; 8673 int i, err = 0; 8674 8675 state = old->frame[old->curframe]; 8676 state_reg = state->regs; 8677 for (i = 0; i < BPF_REG_FP; i++, state_reg++) { 8678 if (state_reg->type != SCALAR_VALUE || 8679 !state_reg->precise) 8680 continue; 8681 if (env->log.level & BPF_LOG_LEVEL2) 8682 verbose(env, "propagating r%d\n", i); 8683 err = mark_chain_precision(env, i); 8684 if (err < 0) 8685 return err; 8686 } 8687 8688 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 8689 if (state->stack[i].slot_type[0] != STACK_SPILL) 8690 continue; 8691 state_reg = &state->stack[i].spilled_ptr; 8692 if (state_reg->type != SCALAR_VALUE || 8693 !state_reg->precise) 8694 continue; 8695 if (env->log.level & BPF_LOG_LEVEL2) 8696 verbose(env, "propagating fp%d\n", 8697 (-i - 1) * BPF_REG_SIZE); 8698 err = mark_chain_precision_stack(env, i); 8699 if (err < 0) 8700 return err; 8701 } 8702 return 0; 8703 } 8704 8705 static bool states_maybe_looping(struct bpf_verifier_state *old, 8706 struct bpf_verifier_state *cur) 8707 { 8708 struct bpf_func_state *fold, *fcur; 8709 int i, fr = cur->curframe; 8710 8711 if (old->curframe != fr) 8712 return false; 8713 8714 fold = old->frame[fr]; 8715 fcur = cur->frame[fr]; 8716 for (i = 0; i < MAX_BPF_REG; i++) 8717 if (memcmp(&fold->regs[i], &fcur->regs[i], 8718 offsetof(struct bpf_reg_state, parent))) 8719 return false; 8720 return true; 8721 } 8722 8723 8724 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) 8725 { 8726 struct bpf_verifier_state_list *new_sl; 8727 struct bpf_verifier_state_list *sl, **pprev; 8728 struct bpf_verifier_state *cur = env->cur_state, *new; 8729 int i, j, err, states_cnt = 0; 8730 bool add_new_state = env->test_state_freq ? true : false; 8731 8732 cur->last_insn_idx = env->prev_insn_idx; 8733 if (!env->insn_aux_data[insn_idx].prune_point) 8734 /* this 'insn_idx' instruction wasn't marked, so we will not 8735 * be doing state search here 8736 */ 8737 return 0; 8738 8739 /* bpf progs typically have pruning point every 4 instructions 8740 * http://vger.kernel.org/bpfconf2019.html#session-1 8741 * Do not add new state for future pruning if the verifier hasn't seen 8742 * at least 2 jumps and at least 8 instructions. 8743 * This heuristics helps decrease 'total_states' and 'peak_states' metric. 8744 * In tests that amounts to up to 50% reduction into total verifier 8745 * memory consumption and 20% verifier time speedup. 8746 */ 8747 if (env->jmps_processed - env->prev_jmps_processed >= 2 && 8748 env->insn_processed - env->prev_insn_processed >= 8) 8749 add_new_state = true; 8750 8751 pprev = explored_state(env, insn_idx); 8752 sl = *pprev; 8753 8754 clean_live_states(env, insn_idx, cur); 8755 8756 while (sl) { 8757 states_cnt++; 8758 if (sl->state.insn_idx != insn_idx) 8759 goto next; 8760 if (sl->state.branches) { 8761 if (states_maybe_looping(&sl->state, cur) && 8762 states_equal(env, &sl->state, cur)) { 8763 verbose_linfo(env, insn_idx, "; "); 8764 verbose(env, "infinite loop detected at insn %d\n", insn_idx); 8765 return -EINVAL; 8766 } 8767 /* if the verifier is processing a loop, avoid adding new state 8768 * too often, since different loop iterations have distinct 8769 * states and may not help future pruning. 8770 * This threshold shouldn't be too low to make sure that 8771 * a loop with large bound will be rejected quickly. 8772 * The most abusive loop will be: 8773 * r1 += 1 8774 * if r1 < 1000000 goto pc-2 8775 * 1M insn_procssed limit / 100 == 10k peak states. 8776 * This threshold shouldn't be too high either, since states 8777 * at the end of the loop are likely to be useful in pruning. 8778 */ 8779 if (env->jmps_processed - env->prev_jmps_processed < 20 && 8780 env->insn_processed - env->prev_insn_processed < 100) 8781 add_new_state = false; 8782 goto miss; 8783 } 8784 if (states_equal(env, &sl->state, cur)) { 8785 sl->hit_cnt++; 8786 /* reached equivalent register/stack state, 8787 * prune the search. 8788 * Registers read by the continuation are read by us. 8789 * If we have any write marks in env->cur_state, they 8790 * will prevent corresponding reads in the continuation 8791 * from reaching our parent (an explored_state). Our 8792 * own state will get the read marks recorded, but 8793 * they'll be immediately forgotten as we're pruning 8794 * this state and will pop a new one. 8795 */ 8796 err = propagate_liveness(env, &sl->state, cur); 8797 8798 /* if previous state reached the exit with precision and 8799 * current state is equivalent to it (except precsion marks) 8800 * the precision needs to be propagated back in 8801 * the current state. 8802 */ 8803 err = err ? : push_jmp_history(env, cur); 8804 err = err ? : propagate_precision(env, &sl->state); 8805 if (err) 8806 return err; 8807 return 1; 8808 } 8809 miss: 8810 /* when new state is not going to be added do not increase miss count. 8811 * Otherwise several loop iterations will remove the state 8812 * recorded earlier. The goal of these heuristics is to have 8813 * states from some iterations of the loop (some in the beginning 8814 * and some at the end) to help pruning. 8815 */ 8816 if (add_new_state) 8817 sl->miss_cnt++; 8818 /* heuristic to determine whether this state is beneficial 8819 * to keep checking from state equivalence point of view. 8820 * Higher numbers increase max_states_per_insn and verification time, 8821 * but do not meaningfully decrease insn_processed. 8822 */ 8823 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) { 8824 /* the state is unlikely to be useful. Remove it to 8825 * speed up verification 8826 */ 8827 *pprev = sl->next; 8828 if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { 8829 u32 br = sl->state.branches; 8830 8831 WARN_ONCE(br, 8832 "BUG live_done but branches_to_explore %d\n", 8833 br); 8834 free_verifier_state(&sl->state, false); 8835 kfree(sl); 8836 env->peak_states--; 8837 } else { 8838 /* cannot free this state, since parentage chain may 8839 * walk it later. Add it for free_list instead to 8840 * be freed at the end of verification 8841 */ 8842 sl->next = env->free_list; 8843 env->free_list = sl; 8844 } 8845 sl = *pprev; 8846 continue; 8847 } 8848 next: 8849 pprev = &sl->next; 8850 sl = *pprev; 8851 } 8852 8853 if (env->max_states_per_insn < states_cnt) 8854 env->max_states_per_insn = states_cnt; 8855 8856 if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) 8857 return push_jmp_history(env, cur); 8858 8859 if (!add_new_state) 8860 return push_jmp_history(env, cur); 8861 8862 /* There were no equivalent states, remember the current one. 8863 * Technically the current state is not proven to be safe yet, 8864 * but it will either reach outer most bpf_exit (which means it's safe) 8865 * or it will be rejected. When there are no loops the verifier won't be 8866 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) 8867 * again on the way to bpf_exit. 8868 * When looping the sl->state.branches will be > 0 and this state 8869 * will not be considered for equivalence until branches == 0. 8870 */ 8871 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); 8872 if (!new_sl) 8873 return -ENOMEM; 8874 env->total_states++; 8875 env->peak_states++; 8876 env->prev_jmps_processed = env->jmps_processed; 8877 env->prev_insn_processed = env->insn_processed; 8878 8879 /* add new state to the head of linked list */ 8880 new = &new_sl->state; 8881 err = copy_verifier_state(new, cur); 8882 if (err) { 8883 free_verifier_state(new, false); 8884 kfree(new_sl); 8885 return err; 8886 } 8887 new->insn_idx = insn_idx; 8888 WARN_ONCE(new->branches != 1, 8889 "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx); 8890 8891 cur->parent = new; 8892 cur->first_insn_idx = insn_idx; 8893 clear_jmp_history(cur); 8894 new_sl->next = *explored_state(env, insn_idx); 8895 *explored_state(env, insn_idx) = new_sl; 8896 /* connect new state to parentage chain. Current frame needs all 8897 * registers connected. Only r6 - r9 of the callers are alive (pushed 8898 * to the stack implicitly by JITs) so in callers' frames connect just 8899 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to 8900 * the state of the call instruction (with WRITTEN set), and r0 comes 8901 * from callee with its full parentage chain, anyway. 8902 */ 8903 /* clear write marks in current state: the writes we did are not writes 8904 * our child did, so they don't screen off its reads from us. 8905 * (There are no read marks in current state, because reads always mark 8906 * their parent and current state never has children yet. Only 8907 * explored_states can get read marks.) 8908 */ 8909 for (j = 0; j <= cur->curframe; j++) { 8910 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) 8911 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i]; 8912 for (i = 0; i < BPF_REG_FP; i++) 8913 cur->frame[j]->regs[i].live = REG_LIVE_NONE; 8914 } 8915 8916 /* all stack frames are accessible from callee, clear them all */ 8917 for (j = 0; j <= cur->curframe; j++) { 8918 struct bpf_func_state *frame = cur->frame[j]; 8919 struct bpf_func_state *newframe = new->frame[j]; 8920 8921 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) { 8922 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE; 8923 frame->stack[i].spilled_ptr.parent = 8924 &newframe->stack[i].spilled_ptr; 8925 } 8926 } 8927 return 0; 8928 } 8929 8930 /* Return true if it's OK to have the same insn return a different type. */ 8931 static bool reg_type_mismatch_ok(enum bpf_reg_type type) 8932 { 8933 switch (type) { 8934 case PTR_TO_CTX: 8935 case PTR_TO_SOCKET: 8936 case PTR_TO_SOCKET_OR_NULL: 8937 case PTR_TO_SOCK_COMMON: 8938 case PTR_TO_SOCK_COMMON_OR_NULL: 8939 case PTR_TO_TCP_SOCK: 8940 case PTR_TO_TCP_SOCK_OR_NULL: 8941 case PTR_TO_XDP_SOCK: 8942 case PTR_TO_BTF_ID: 8943 case PTR_TO_BTF_ID_OR_NULL: 8944 return false; 8945 default: 8946 return true; 8947 } 8948 } 8949 8950 /* If an instruction was previously used with particular pointer types, then we 8951 * need to be careful to avoid cases such as the below, where it may be ok 8952 * for one branch accessing the pointer, but not ok for the other branch: 8953 * 8954 * R1 = sock_ptr 8955 * goto X; 8956 * ... 8957 * R1 = some_other_valid_ptr; 8958 * goto X; 8959 * ... 8960 * R2 = *(u32 *)(R1 + 0); 8961 */ 8962 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) 8963 { 8964 return src != prev && (!reg_type_mismatch_ok(src) || 8965 !reg_type_mismatch_ok(prev)); 8966 } 8967 8968 static int do_check(struct bpf_verifier_env *env) 8969 { 8970 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); 8971 struct bpf_verifier_state *state = env->cur_state; 8972 struct bpf_insn *insns = env->prog->insnsi; 8973 struct bpf_reg_state *regs; 8974 int insn_cnt = env->prog->len; 8975 bool do_print_state = false; 8976 int prev_insn_idx = -1; 8977 8978 for (;;) { 8979 struct bpf_insn *insn; 8980 u8 class; 8981 int err; 8982 8983 env->prev_insn_idx = prev_insn_idx; 8984 if (env->insn_idx >= insn_cnt) { 8985 verbose(env, "invalid insn idx %d insn_cnt %d\n", 8986 env->insn_idx, insn_cnt); 8987 return -EFAULT; 8988 } 8989 8990 insn = &insns[env->insn_idx]; 8991 class = BPF_CLASS(insn->code); 8992 8993 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { 8994 verbose(env, 8995 "BPF program is too large. Processed %d insn\n", 8996 env->insn_processed); 8997 return -E2BIG; 8998 } 8999 9000 err = is_state_visited(env, env->insn_idx); 9001 if (err < 0) 9002 return err; 9003 if (err == 1) { 9004 /* found equivalent state, can prune the search */ 9005 if (env->log.level & BPF_LOG_LEVEL) { 9006 if (do_print_state) 9007 verbose(env, "\nfrom %d to %d%s: safe\n", 9008 env->prev_insn_idx, env->insn_idx, 9009 env->cur_state->speculative ? 9010 " (speculative execution)" : ""); 9011 else 9012 verbose(env, "%d: safe\n", env->insn_idx); 9013 } 9014 goto process_bpf_exit; 9015 } 9016 9017 if (signal_pending(current)) 9018 return -EAGAIN; 9019 9020 if (need_resched()) 9021 cond_resched(); 9022 9023 if (env->log.level & BPF_LOG_LEVEL2 || 9024 (env->log.level & BPF_LOG_LEVEL && do_print_state)) { 9025 if (env->log.level & BPF_LOG_LEVEL2) 9026 verbose(env, "%d:", env->insn_idx); 9027 else 9028 verbose(env, "\nfrom %d to %d%s:", 9029 env->prev_insn_idx, env->insn_idx, 9030 env->cur_state->speculative ? 9031 " (speculative execution)" : ""); 9032 print_verifier_state(env, state->frame[state->curframe]); 9033 do_print_state = false; 9034 } 9035 9036 if (env->log.level & BPF_LOG_LEVEL) { 9037 const struct bpf_insn_cbs cbs = { 9038 .cb_print = verbose, 9039 .private_data = env, 9040 }; 9041 9042 verbose_linfo(env, env->insn_idx, "; "); 9043 verbose(env, "%d: ", env->insn_idx); 9044 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); 9045 } 9046 9047 if (bpf_prog_is_dev_bound(env->prog->aux)) { 9048 err = bpf_prog_offload_verify_insn(env, env->insn_idx, 9049 env->prev_insn_idx); 9050 if (err) 9051 return err; 9052 } 9053 9054 regs = cur_regs(env); 9055 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; 9056 prev_insn_idx = env->insn_idx; 9057 9058 if (class == BPF_ALU || class == BPF_ALU64) { 9059 err = check_alu_op(env, insn); 9060 if (err) 9061 return err; 9062 9063 } else if (class == BPF_LDX) { 9064 enum bpf_reg_type *prev_src_type, src_reg_type; 9065 9066 /* check for reserved fields is already done */ 9067 9068 /* check src operand */ 9069 err = check_reg_arg(env, insn->src_reg, SRC_OP); 9070 if (err) 9071 return err; 9072 9073 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 9074 if (err) 9075 return err; 9076 9077 src_reg_type = regs[insn->src_reg].type; 9078 9079 /* check that memory (src_reg + off) is readable, 9080 * the state of dst_reg will be updated by this func 9081 */ 9082 err = check_mem_access(env, env->insn_idx, insn->src_reg, 9083 insn->off, BPF_SIZE(insn->code), 9084 BPF_READ, insn->dst_reg, false); 9085 if (err) 9086 return err; 9087 9088 prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type; 9089 9090 if (*prev_src_type == NOT_INIT) { 9091 /* saw a valid insn 9092 * dst_reg = *(u32 *)(src_reg + off) 9093 * save type to validate intersecting paths 9094 */ 9095 *prev_src_type = src_reg_type; 9096 9097 } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) { 9098 /* ABuser program is trying to use the same insn 9099 * dst_reg = *(u32*) (src_reg + off) 9100 * with different pointer types: 9101 * src_reg == ctx in one branch and 9102 * src_reg == stack|map in some other branch. 9103 * Reject it. 9104 */ 9105 verbose(env, "same insn cannot be used with different pointers\n"); 9106 return -EINVAL; 9107 } 9108 9109 } else if (class == BPF_STX) { 9110 enum bpf_reg_type *prev_dst_type, dst_reg_type; 9111 9112 if (BPF_MODE(insn->code) == BPF_XADD) { 9113 err = check_xadd(env, env->insn_idx, insn); 9114 if (err) 9115 return err; 9116 env->insn_idx++; 9117 continue; 9118 } 9119 9120 /* check src1 operand */ 9121 err = check_reg_arg(env, insn->src_reg, SRC_OP); 9122 if (err) 9123 return err; 9124 /* check src2 operand */ 9125 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 9126 if (err) 9127 return err; 9128 9129 dst_reg_type = regs[insn->dst_reg].type; 9130 9131 /* check that memory (dst_reg + off) is writeable */ 9132 err = check_mem_access(env, env->insn_idx, insn->dst_reg, 9133 insn->off, BPF_SIZE(insn->code), 9134 BPF_WRITE, insn->src_reg, false); 9135 if (err) 9136 return err; 9137 9138 prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type; 9139 9140 if (*prev_dst_type == NOT_INIT) { 9141 *prev_dst_type = dst_reg_type; 9142 } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) { 9143 verbose(env, "same insn cannot be used with different pointers\n"); 9144 return -EINVAL; 9145 } 9146 9147 } else if (class == BPF_ST) { 9148 if (BPF_MODE(insn->code) != BPF_MEM || 9149 insn->src_reg != BPF_REG_0) { 9150 verbose(env, "BPF_ST uses reserved fields\n"); 9151 return -EINVAL; 9152 } 9153 /* check src operand */ 9154 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 9155 if (err) 9156 return err; 9157 9158 if (is_ctx_reg(env, insn->dst_reg)) { 9159 verbose(env, "BPF_ST stores into R%d %s is not allowed\n", 9160 insn->dst_reg, 9161 reg_type_str[reg_state(env, insn->dst_reg)->type]); 9162 return -EACCES; 9163 } 9164 9165 /* check that memory (dst_reg + off) is writeable */ 9166 err = check_mem_access(env, env->insn_idx, insn->dst_reg, 9167 insn->off, BPF_SIZE(insn->code), 9168 BPF_WRITE, -1, false); 9169 if (err) 9170 return err; 9171 9172 } else if (class == BPF_JMP || class == BPF_JMP32) { 9173 u8 opcode = BPF_OP(insn->code); 9174 9175 env->jmps_processed++; 9176 if (opcode == BPF_CALL) { 9177 if (BPF_SRC(insn->code) != BPF_K || 9178 insn->off != 0 || 9179 (insn->src_reg != BPF_REG_0 && 9180 insn->src_reg != BPF_PSEUDO_CALL) || 9181 insn->dst_reg != BPF_REG_0 || 9182 class == BPF_JMP32) { 9183 verbose(env, "BPF_CALL uses reserved fields\n"); 9184 return -EINVAL; 9185 } 9186 9187 if (env->cur_state->active_spin_lock && 9188 (insn->src_reg == BPF_PSEUDO_CALL || 9189 insn->imm != BPF_FUNC_spin_unlock)) { 9190 verbose(env, "function calls are not allowed while holding a lock\n"); 9191 return -EINVAL; 9192 } 9193 if (insn->src_reg == BPF_PSEUDO_CALL) 9194 err = check_func_call(env, insn, &env->insn_idx); 9195 else 9196 err = check_helper_call(env, insn->imm, env->insn_idx); 9197 if (err) 9198 return err; 9199 9200 } else if (opcode == BPF_JA) { 9201 if (BPF_SRC(insn->code) != BPF_K || 9202 insn->imm != 0 || 9203 insn->src_reg != BPF_REG_0 || 9204 insn->dst_reg != BPF_REG_0 || 9205 class == BPF_JMP32) { 9206 verbose(env, "BPF_JA uses reserved fields\n"); 9207 return -EINVAL; 9208 } 9209 9210 env->insn_idx += insn->off + 1; 9211 continue; 9212 9213 } else if (opcode == BPF_EXIT) { 9214 if (BPF_SRC(insn->code) != BPF_K || 9215 insn->imm != 0 || 9216 insn->src_reg != BPF_REG_0 || 9217 insn->dst_reg != BPF_REG_0 || 9218 class == BPF_JMP32) { 9219 verbose(env, "BPF_EXIT uses reserved fields\n"); 9220 return -EINVAL; 9221 } 9222 9223 if (env->cur_state->active_spin_lock) { 9224 verbose(env, "bpf_spin_unlock is missing\n"); 9225 return -EINVAL; 9226 } 9227 9228 if (state->curframe) { 9229 /* exit from nested function */ 9230 err = prepare_func_exit(env, &env->insn_idx); 9231 if (err) 9232 return err; 9233 do_print_state = true; 9234 continue; 9235 } 9236 9237 err = check_reference_leak(env); 9238 if (err) 9239 return err; 9240 9241 err = check_return_code(env); 9242 if (err) 9243 return err; 9244 process_bpf_exit: 9245 update_branch_counts(env, env->cur_state); 9246 err = pop_stack(env, &prev_insn_idx, 9247 &env->insn_idx, pop_log); 9248 if (err < 0) { 9249 if (err != -ENOENT) 9250 return err; 9251 break; 9252 } else { 9253 do_print_state = true; 9254 continue; 9255 } 9256 } else { 9257 err = check_cond_jmp_op(env, insn, &env->insn_idx); 9258 if (err) 9259 return err; 9260 } 9261 } else if (class == BPF_LD) { 9262 u8 mode = BPF_MODE(insn->code); 9263 9264 if (mode == BPF_ABS || mode == BPF_IND) { 9265 err = check_ld_abs(env, insn); 9266 if (err) 9267 return err; 9268 9269 } else if (mode == BPF_IMM) { 9270 err = check_ld_imm(env, insn); 9271 if (err) 9272 return err; 9273 9274 env->insn_idx++; 9275 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; 9276 } else { 9277 verbose(env, "invalid BPF_LD mode\n"); 9278 return -EINVAL; 9279 } 9280 } else { 9281 verbose(env, "unknown insn class %d\n", class); 9282 return -EINVAL; 9283 } 9284 9285 env->insn_idx++; 9286 } 9287 9288 return 0; 9289 } 9290 9291 static int check_map_prealloc(struct bpf_map *map) 9292 { 9293 return (map->map_type != BPF_MAP_TYPE_HASH && 9294 map->map_type != BPF_MAP_TYPE_PERCPU_HASH && 9295 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) || 9296 !(map->map_flags & BPF_F_NO_PREALLOC); 9297 } 9298 9299 static bool is_tracing_prog_type(enum bpf_prog_type type) 9300 { 9301 switch (type) { 9302 case BPF_PROG_TYPE_KPROBE: 9303 case BPF_PROG_TYPE_TRACEPOINT: 9304 case BPF_PROG_TYPE_PERF_EVENT: 9305 case BPF_PROG_TYPE_RAW_TRACEPOINT: 9306 return true; 9307 default: 9308 return false; 9309 } 9310 } 9311 9312 static bool is_preallocated_map(struct bpf_map *map) 9313 { 9314 if (!check_map_prealloc(map)) 9315 return false; 9316 if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta)) 9317 return false; 9318 return true; 9319 } 9320 9321 static int check_map_prog_compatibility(struct bpf_verifier_env *env, 9322 struct bpf_map *map, 9323 struct bpf_prog *prog) 9324 9325 { 9326 enum bpf_prog_type prog_type = resolve_prog_type(prog); 9327 /* 9328 * Validate that trace type programs use preallocated hash maps. 9329 * 9330 * For programs attached to PERF events this is mandatory as the 9331 * perf NMI can hit any arbitrary code sequence. 9332 * 9333 * All other trace types using preallocated hash maps are unsafe as 9334 * well because tracepoint or kprobes can be inside locked regions 9335 * of the memory allocator or at a place where a recursion into the 9336 * memory allocator would see inconsistent state. 9337 * 9338 * On RT enabled kernels run-time allocation of all trace type 9339 * programs is strictly prohibited due to lock type constraints. On 9340 * !RT kernels it is allowed for backwards compatibility reasons for 9341 * now, but warnings are emitted so developers are made aware of 9342 * the unsafety and can fix their programs before this is enforced. 9343 */ 9344 if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) { 9345 if (prog_type == BPF_PROG_TYPE_PERF_EVENT) { 9346 verbose(env, "perf_event programs can only use preallocated hash map\n"); 9347 return -EINVAL; 9348 } 9349 if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 9350 verbose(env, "trace type programs can only use preallocated hash map\n"); 9351 return -EINVAL; 9352 } 9353 WARN_ONCE(1, "trace type BPF program uses run-time allocation\n"); 9354 verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n"); 9355 } 9356 9357 if ((is_tracing_prog_type(prog_type) || 9358 prog_type == BPF_PROG_TYPE_SOCKET_FILTER) && 9359 map_value_has_spin_lock(map)) { 9360 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); 9361 return -EINVAL; 9362 } 9363 9364 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && 9365 !bpf_offload_prog_map_match(prog, map)) { 9366 verbose(env, "offload device mismatch between prog and map\n"); 9367 return -EINVAL; 9368 } 9369 9370 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 9371 verbose(env, "bpf_struct_ops map cannot be used in prog\n"); 9372 return -EINVAL; 9373 } 9374 9375 if (prog->aux->sleepable) 9376 switch (map->map_type) { 9377 case BPF_MAP_TYPE_HASH: 9378 case BPF_MAP_TYPE_LRU_HASH: 9379 case BPF_MAP_TYPE_ARRAY: 9380 if (!is_preallocated_map(map)) { 9381 verbose(env, 9382 "Sleepable programs can only use preallocated hash maps\n"); 9383 return -EINVAL; 9384 } 9385 break; 9386 default: 9387 verbose(env, 9388 "Sleepable programs can only use array and hash maps\n"); 9389 return -EINVAL; 9390 } 9391 9392 return 0; 9393 } 9394 9395 static bool bpf_map_is_cgroup_storage(struct bpf_map *map) 9396 { 9397 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || 9398 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); 9399 } 9400 9401 /* look for pseudo eBPF instructions that access map FDs and 9402 * replace them with actual map pointers 9403 */ 9404 static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) 9405 { 9406 struct bpf_insn *insn = env->prog->insnsi; 9407 int insn_cnt = env->prog->len; 9408 int i, j, err; 9409 9410 err = bpf_prog_calc_tag(env->prog); 9411 if (err) 9412 return err; 9413 9414 for (i = 0; i < insn_cnt; i++, insn++) { 9415 if (BPF_CLASS(insn->code) == BPF_LDX && 9416 (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) { 9417 verbose(env, "BPF_LDX uses reserved fields\n"); 9418 return -EINVAL; 9419 } 9420 9421 if (BPF_CLASS(insn->code) == BPF_STX && 9422 ((BPF_MODE(insn->code) != BPF_MEM && 9423 BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) { 9424 verbose(env, "BPF_STX uses reserved fields\n"); 9425 return -EINVAL; 9426 } 9427 9428 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { 9429 struct bpf_insn_aux_data *aux; 9430 struct bpf_map *map; 9431 struct fd f; 9432 u64 addr; 9433 9434 if (i == insn_cnt - 1 || insn[1].code != 0 || 9435 insn[1].dst_reg != 0 || insn[1].src_reg != 0 || 9436 insn[1].off != 0) { 9437 verbose(env, "invalid bpf_ld_imm64 insn\n"); 9438 return -EINVAL; 9439 } 9440 9441 if (insn[0].src_reg == 0) 9442 /* valid generic load 64-bit imm */ 9443 goto next_insn; 9444 9445 /* In final convert_pseudo_ld_imm64() step, this is 9446 * converted into regular 64-bit imm load insn. 9447 */ 9448 if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD && 9449 insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) || 9450 (insn[0].src_reg == BPF_PSEUDO_MAP_FD && 9451 insn[1].imm != 0)) { 9452 verbose(env, 9453 "unrecognized bpf_ld_imm64 insn\n"); 9454 return -EINVAL; 9455 } 9456 9457 f = fdget(insn[0].imm); 9458 map = __bpf_map_get(f); 9459 if (IS_ERR(map)) { 9460 verbose(env, "fd %d is not pointing to valid bpf_map\n", 9461 insn[0].imm); 9462 return PTR_ERR(map); 9463 } 9464 9465 err = check_map_prog_compatibility(env, map, env->prog); 9466 if (err) { 9467 fdput(f); 9468 return err; 9469 } 9470 9471 aux = &env->insn_aux_data[i]; 9472 if (insn->src_reg == BPF_PSEUDO_MAP_FD) { 9473 addr = (unsigned long)map; 9474 } else { 9475 u32 off = insn[1].imm; 9476 9477 if (off >= BPF_MAX_VAR_OFF) { 9478 verbose(env, "direct value offset of %u is not allowed\n", off); 9479 fdput(f); 9480 return -EINVAL; 9481 } 9482 9483 if (!map->ops->map_direct_value_addr) { 9484 verbose(env, "no direct value access support for this map type\n"); 9485 fdput(f); 9486 return -EINVAL; 9487 } 9488 9489 err = map->ops->map_direct_value_addr(map, &addr, off); 9490 if (err) { 9491 verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", 9492 map->value_size, off); 9493 fdput(f); 9494 return err; 9495 } 9496 9497 aux->map_off = off; 9498 addr += off; 9499 } 9500 9501 insn[0].imm = (u32)addr; 9502 insn[1].imm = addr >> 32; 9503 9504 /* check whether we recorded this map already */ 9505 for (j = 0; j < env->used_map_cnt; j++) { 9506 if (env->used_maps[j] == map) { 9507 aux->map_index = j; 9508 fdput(f); 9509 goto next_insn; 9510 } 9511 } 9512 9513 if (env->used_map_cnt >= MAX_USED_MAPS) { 9514 fdput(f); 9515 return -E2BIG; 9516 } 9517 9518 /* hold the map. If the program is rejected by verifier, 9519 * the map will be released by release_maps() or it 9520 * will be used by the valid program until it's unloaded 9521 * and all maps are released in free_used_maps() 9522 */ 9523 bpf_map_inc(map); 9524 9525 aux->map_index = env->used_map_cnt; 9526 env->used_maps[env->used_map_cnt++] = map; 9527 9528 if (bpf_map_is_cgroup_storage(map) && 9529 bpf_cgroup_storage_assign(env->prog->aux, map)) { 9530 verbose(env, "only one cgroup storage of each type is allowed\n"); 9531 fdput(f); 9532 return -EBUSY; 9533 } 9534 9535 fdput(f); 9536 next_insn: 9537 insn++; 9538 i++; 9539 continue; 9540 } 9541 9542 /* Basic sanity check before we invest more work here. */ 9543 if (!bpf_opcode_in_insntable(insn->code)) { 9544 verbose(env, "unknown opcode %02x\n", insn->code); 9545 return -EINVAL; 9546 } 9547 } 9548 9549 /* now all pseudo BPF_LD_IMM64 instructions load valid 9550 * 'struct bpf_map *' into a register instead of user map_fd. 9551 * These pointers will be used later by verifier to validate map access. 9552 */ 9553 return 0; 9554 } 9555 9556 /* drop refcnt of maps used by the rejected program */ 9557 static void release_maps(struct bpf_verifier_env *env) 9558 { 9559 __bpf_free_used_maps(env->prog->aux, env->used_maps, 9560 env->used_map_cnt); 9561 } 9562 9563 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ 9564 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) 9565 { 9566 struct bpf_insn *insn = env->prog->insnsi; 9567 int insn_cnt = env->prog->len; 9568 int i; 9569 9570 for (i = 0; i < insn_cnt; i++, insn++) 9571 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW)) 9572 insn->src_reg = 0; 9573 } 9574 9575 /* single env->prog->insni[off] instruction was replaced with the range 9576 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying 9577 * [0, off) and [off, end) to new locations, so the patched range stays zero 9578 */ 9579 static int adjust_insn_aux_data(struct bpf_verifier_env *env, 9580 struct bpf_prog *new_prog, u32 off, u32 cnt) 9581 { 9582 struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; 9583 struct bpf_insn *insn = new_prog->insnsi; 9584 u32 prog_len; 9585 int i; 9586 9587 /* aux info at OFF always needs adjustment, no matter fast path 9588 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the 9589 * original insn at old prog. 9590 */ 9591 old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1); 9592 9593 if (cnt == 1) 9594 return 0; 9595 prog_len = new_prog->len; 9596 new_data = vzalloc(array_size(prog_len, 9597 sizeof(struct bpf_insn_aux_data))); 9598 if (!new_data) 9599 return -ENOMEM; 9600 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); 9601 memcpy(new_data + off + cnt - 1, old_data + off, 9602 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); 9603 for (i = off; i < off + cnt - 1; i++) { 9604 new_data[i].seen = env->pass_cnt; 9605 new_data[i].zext_dst = insn_has_def32(env, insn + i); 9606 } 9607 env->insn_aux_data = new_data; 9608 vfree(old_data); 9609 return 0; 9610 } 9611 9612 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) 9613 { 9614 int i; 9615 9616 if (len == 1) 9617 return; 9618 /* NOTE: fake 'exit' subprog should be updated as well. */ 9619 for (i = 0; i <= env->subprog_cnt; i++) { 9620 if (env->subprog_info[i].start <= off) 9621 continue; 9622 env->subprog_info[i].start += len - 1; 9623 } 9624 } 9625 9626 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, 9627 const struct bpf_insn *patch, u32 len) 9628 { 9629 struct bpf_prog *new_prog; 9630 9631 new_prog = bpf_patch_insn_single(env->prog, off, patch, len); 9632 if (IS_ERR(new_prog)) { 9633 if (PTR_ERR(new_prog) == -ERANGE) 9634 verbose(env, 9635 "insn %d cannot be patched due to 16-bit range\n", 9636 env->insn_aux_data[off].orig_idx); 9637 return NULL; 9638 } 9639 if (adjust_insn_aux_data(env, new_prog, off, len)) 9640 return NULL; 9641 adjust_subprog_starts(env, off, len); 9642 return new_prog; 9643 } 9644 9645 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, 9646 u32 off, u32 cnt) 9647 { 9648 int i, j; 9649 9650 /* find first prog starting at or after off (first to remove) */ 9651 for (i = 0; i < env->subprog_cnt; i++) 9652 if (env->subprog_info[i].start >= off) 9653 break; 9654 /* find first prog starting at or after off + cnt (first to stay) */ 9655 for (j = i; j < env->subprog_cnt; j++) 9656 if (env->subprog_info[j].start >= off + cnt) 9657 break; 9658 /* if j doesn't start exactly at off + cnt, we are just removing 9659 * the front of previous prog 9660 */ 9661 if (env->subprog_info[j].start != off + cnt) 9662 j--; 9663 9664 if (j > i) { 9665 struct bpf_prog_aux *aux = env->prog->aux; 9666 int move; 9667 9668 /* move fake 'exit' subprog as well */ 9669 move = env->subprog_cnt + 1 - j; 9670 9671 memmove(env->subprog_info + i, 9672 env->subprog_info + j, 9673 sizeof(*env->subprog_info) * move); 9674 env->subprog_cnt -= j - i; 9675 9676 /* remove func_info */ 9677 if (aux->func_info) { 9678 move = aux->func_info_cnt - j; 9679 9680 memmove(aux->func_info + i, 9681 aux->func_info + j, 9682 sizeof(*aux->func_info) * move); 9683 aux->func_info_cnt -= j - i; 9684 /* func_info->insn_off is set after all code rewrites, 9685 * in adjust_btf_func() - no need to adjust 9686 */ 9687 } 9688 } else { 9689 /* convert i from "first prog to remove" to "first to adjust" */ 9690 if (env->subprog_info[i].start == off) 9691 i++; 9692 } 9693 9694 /* update fake 'exit' subprog as well */ 9695 for (; i <= env->subprog_cnt; i++) 9696 env->subprog_info[i].start -= cnt; 9697 9698 return 0; 9699 } 9700 9701 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, 9702 u32 cnt) 9703 { 9704 struct bpf_prog *prog = env->prog; 9705 u32 i, l_off, l_cnt, nr_linfo; 9706 struct bpf_line_info *linfo; 9707 9708 nr_linfo = prog->aux->nr_linfo; 9709 if (!nr_linfo) 9710 return 0; 9711 9712 linfo = prog->aux->linfo; 9713 9714 /* find first line info to remove, count lines to be removed */ 9715 for (i = 0; i < nr_linfo; i++) 9716 if (linfo[i].insn_off >= off) 9717 break; 9718 9719 l_off = i; 9720 l_cnt = 0; 9721 for (; i < nr_linfo; i++) 9722 if (linfo[i].insn_off < off + cnt) 9723 l_cnt++; 9724 else 9725 break; 9726 9727 /* First live insn doesn't match first live linfo, it needs to "inherit" 9728 * last removed linfo. prog is already modified, so prog->len == off 9729 * means no live instructions after (tail of the program was removed). 9730 */ 9731 if (prog->len != off && l_cnt && 9732 (i == nr_linfo || linfo[i].insn_off != off + cnt)) { 9733 l_cnt--; 9734 linfo[--i].insn_off = off + cnt; 9735 } 9736 9737 /* remove the line info which refer to the removed instructions */ 9738 if (l_cnt) { 9739 memmove(linfo + l_off, linfo + i, 9740 sizeof(*linfo) * (nr_linfo - i)); 9741 9742 prog->aux->nr_linfo -= l_cnt; 9743 nr_linfo = prog->aux->nr_linfo; 9744 } 9745 9746 /* pull all linfo[i].insn_off >= off + cnt in by cnt */ 9747 for (i = l_off; i < nr_linfo; i++) 9748 linfo[i].insn_off -= cnt; 9749 9750 /* fix up all subprogs (incl. 'exit') which start >= off */ 9751 for (i = 0; i <= env->subprog_cnt; i++) 9752 if (env->subprog_info[i].linfo_idx > l_off) { 9753 /* program may have started in the removed region but 9754 * may not be fully removed 9755 */ 9756 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) 9757 env->subprog_info[i].linfo_idx -= l_cnt; 9758 else 9759 env->subprog_info[i].linfo_idx = l_off; 9760 } 9761 9762 return 0; 9763 } 9764 9765 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) 9766 { 9767 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 9768 unsigned int orig_prog_len = env->prog->len; 9769 int err; 9770 9771 if (bpf_prog_is_dev_bound(env->prog->aux)) 9772 bpf_prog_offload_remove_insns(env, off, cnt); 9773 9774 err = bpf_remove_insns(env->prog, off, cnt); 9775 if (err) 9776 return err; 9777 9778 err = adjust_subprog_starts_after_remove(env, off, cnt); 9779 if (err) 9780 return err; 9781 9782 err = bpf_adj_linfo_after_remove(env, off, cnt); 9783 if (err) 9784 return err; 9785 9786 memmove(aux_data + off, aux_data + off + cnt, 9787 sizeof(*aux_data) * (orig_prog_len - off - cnt)); 9788 9789 return 0; 9790 } 9791 9792 /* The verifier does more data flow analysis than llvm and will not 9793 * explore branches that are dead at run time. Malicious programs can 9794 * have dead code too. Therefore replace all dead at-run-time code 9795 * with 'ja -1'. 9796 * 9797 * Just nops are not optimal, e.g. if they would sit at the end of the 9798 * program and through another bug we would manage to jump there, then 9799 * we'd execute beyond program memory otherwise. Returning exception 9800 * code also wouldn't work since we can have subprogs where the dead 9801 * code could be located. 9802 */ 9803 static void sanitize_dead_code(struct bpf_verifier_env *env) 9804 { 9805 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 9806 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1); 9807 struct bpf_insn *insn = env->prog->insnsi; 9808 const int insn_cnt = env->prog->len; 9809 int i; 9810 9811 for (i = 0; i < insn_cnt; i++) { 9812 if (aux_data[i].seen) 9813 continue; 9814 memcpy(insn + i, &trap, sizeof(trap)); 9815 } 9816 } 9817 9818 static bool insn_is_cond_jump(u8 code) 9819 { 9820 u8 op; 9821 9822 if (BPF_CLASS(code) == BPF_JMP32) 9823 return true; 9824 9825 if (BPF_CLASS(code) != BPF_JMP) 9826 return false; 9827 9828 op = BPF_OP(code); 9829 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL; 9830 } 9831 9832 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) 9833 { 9834 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 9835 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); 9836 struct bpf_insn *insn = env->prog->insnsi; 9837 const int insn_cnt = env->prog->len; 9838 int i; 9839 9840 for (i = 0; i < insn_cnt; i++, insn++) { 9841 if (!insn_is_cond_jump(insn->code)) 9842 continue; 9843 9844 if (!aux_data[i + 1].seen) 9845 ja.off = insn->off; 9846 else if (!aux_data[i + 1 + insn->off].seen) 9847 ja.off = 0; 9848 else 9849 continue; 9850 9851 if (bpf_prog_is_dev_bound(env->prog->aux)) 9852 bpf_prog_offload_replace_insn(env, i, &ja); 9853 9854 memcpy(insn, &ja, sizeof(ja)); 9855 } 9856 } 9857 9858 static int opt_remove_dead_code(struct bpf_verifier_env *env) 9859 { 9860 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 9861 int insn_cnt = env->prog->len; 9862 int i, err; 9863 9864 for (i = 0; i < insn_cnt; i++) { 9865 int j; 9866 9867 j = 0; 9868 while (i + j < insn_cnt && !aux_data[i + j].seen) 9869 j++; 9870 if (!j) 9871 continue; 9872 9873 err = verifier_remove_insns(env, i, j); 9874 if (err) 9875 return err; 9876 insn_cnt = env->prog->len; 9877 } 9878 9879 return 0; 9880 } 9881 9882 static int opt_remove_nops(struct bpf_verifier_env *env) 9883 { 9884 const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); 9885 struct bpf_insn *insn = env->prog->insnsi; 9886 int insn_cnt = env->prog->len; 9887 int i, err; 9888 9889 for (i = 0; i < insn_cnt; i++) { 9890 if (memcmp(&insn[i], &ja, sizeof(ja))) 9891 continue; 9892 9893 err = verifier_remove_insns(env, i, 1); 9894 if (err) 9895 return err; 9896 insn_cnt--; 9897 i--; 9898 } 9899 9900 return 0; 9901 } 9902 9903 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, 9904 const union bpf_attr *attr) 9905 { 9906 struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4]; 9907 struct bpf_insn_aux_data *aux = env->insn_aux_data; 9908 int i, patch_len, delta = 0, len = env->prog->len; 9909 struct bpf_insn *insns = env->prog->insnsi; 9910 struct bpf_prog *new_prog; 9911 bool rnd_hi32; 9912 9913 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32; 9914 zext_patch[1] = BPF_ZEXT_REG(0); 9915 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0); 9916 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); 9917 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX); 9918 for (i = 0; i < len; i++) { 9919 int adj_idx = i + delta; 9920 struct bpf_insn insn; 9921 9922 insn = insns[adj_idx]; 9923 if (!aux[adj_idx].zext_dst) { 9924 u8 code, class; 9925 u32 imm_rnd; 9926 9927 if (!rnd_hi32) 9928 continue; 9929 9930 code = insn.code; 9931 class = BPF_CLASS(code); 9932 if (insn_no_def(&insn)) 9933 continue; 9934 9935 /* NOTE: arg "reg" (the fourth one) is only used for 9936 * BPF_STX which has been ruled out in above 9937 * check, it is safe to pass NULL here. 9938 */ 9939 if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) { 9940 if (class == BPF_LD && 9941 BPF_MODE(code) == BPF_IMM) 9942 i++; 9943 continue; 9944 } 9945 9946 /* ctx load could be transformed into wider load. */ 9947 if (class == BPF_LDX && 9948 aux[adj_idx].ptr_type == PTR_TO_CTX) 9949 continue; 9950 9951 imm_rnd = get_random_int(); 9952 rnd_hi32_patch[0] = insn; 9953 rnd_hi32_patch[1].imm = imm_rnd; 9954 rnd_hi32_patch[3].dst_reg = insn.dst_reg; 9955 patch = rnd_hi32_patch; 9956 patch_len = 4; 9957 goto apply_patch_buffer; 9958 } 9959 9960 if (!bpf_jit_needs_zext()) 9961 continue; 9962 9963 zext_patch[0] = insn; 9964 zext_patch[1].dst_reg = insn.dst_reg; 9965 zext_patch[1].src_reg = insn.dst_reg; 9966 patch = zext_patch; 9967 patch_len = 2; 9968 apply_patch_buffer: 9969 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len); 9970 if (!new_prog) 9971 return -ENOMEM; 9972 env->prog = new_prog; 9973 insns = new_prog->insnsi; 9974 aux = env->insn_aux_data; 9975 delta += patch_len - 1; 9976 } 9977 9978 return 0; 9979 } 9980 9981 /* convert load instructions that access fields of a context type into a 9982 * sequence of instructions that access fields of the underlying structure: 9983 * struct __sk_buff -> struct sk_buff 9984 * struct bpf_sock_ops -> struct sock 9985 */ 9986 static int convert_ctx_accesses(struct bpf_verifier_env *env) 9987 { 9988 const struct bpf_verifier_ops *ops = env->ops; 9989 int i, cnt, size, ctx_field_size, delta = 0; 9990 const int insn_cnt = env->prog->len; 9991 struct bpf_insn insn_buf[16], *insn; 9992 u32 target_size, size_default, off; 9993 struct bpf_prog *new_prog; 9994 enum bpf_access_type type; 9995 bool is_narrower_load; 9996 9997 if (ops->gen_prologue || env->seen_direct_write) { 9998 if (!ops->gen_prologue) { 9999 verbose(env, "bpf verifier is misconfigured\n"); 10000 return -EINVAL; 10001 } 10002 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, 10003 env->prog); 10004 if (cnt >= ARRAY_SIZE(insn_buf)) { 10005 verbose(env, "bpf verifier is misconfigured\n"); 10006 return -EINVAL; 10007 } else if (cnt) { 10008 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); 10009 if (!new_prog) 10010 return -ENOMEM; 10011 10012 env->prog = new_prog; 10013 delta += cnt - 1; 10014 } 10015 } 10016 10017 if (bpf_prog_is_dev_bound(env->prog->aux)) 10018 return 0; 10019 10020 insn = env->prog->insnsi + delta; 10021 10022 for (i = 0; i < insn_cnt; i++, insn++) { 10023 bpf_convert_ctx_access_t convert_ctx_access; 10024 10025 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || 10026 insn->code == (BPF_LDX | BPF_MEM | BPF_H) || 10027 insn->code == (BPF_LDX | BPF_MEM | BPF_W) || 10028 insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) 10029 type = BPF_READ; 10030 else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || 10031 insn->code == (BPF_STX | BPF_MEM | BPF_H) || 10032 insn->code == (BPF_STX | BPF_MEM | BPF_W) || 10033 insn->code == (BPF_STX | BPF_MEM | BPF_DW)) 10034 type = BPF_WRITE; 10035 else 10036 continue; 10037 10038 if (type == BPF_WRITE && 10039 env->insn_aux_data[i + delta].sanitize_stack_off) { 10040 struct bpf_insn patch[] = { 10041 /* Sanitize suspicious stack slot with zero. 10042 * There are no memory dependencies for this store, 10043 * since it's only using frame pointer and immediate 10044 * constant of zero 10045 */ 10046 BPF_ST_MEM(BPF_DW, BPF_REG_FP, 10047 env->insn_aux_data[i + delta].sanitize_stack_off, 10048 0), 10049 /* the original STX instruction will immediately 10050 * overwrite the same stack slot with appropriate value 10051 */ 10052 *insn, 10053 }; 10054 10055 cnt = ARRAY_SIZE(patch); 10056 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt); 10057 if (!new_prog) 10058 return -ENOMEM; 10059 10060 delta += cnt - 1; 10061 env->prog = new_prog; 10062 insn = new_prog->insnsi + i + delta; 10063 continue; 10064 } 10065 10066 switch (env->insn_aux_data[i + delta].ptr_type) { 10067 case PTR_TO_CTX: 10068 if (!ops->convert_ctx_access) 10069 continue; 10070 convert_ctx_access = ops->convert_ctx_access; 10071 break; 10072 case PTR_TO_SOCKET: 10073 case PTR_TO_SOCK_COMMON: 10074 convert_ctx_access = bpf_sock_convert_ctx_access; 10075 break; 10076 case PTR_TO_TCP_SOCK: 10077 convert_ctx_access = bpf_tcp_sock_convert_ctx_access; 10078 break; 10079 case PTR_TO_XDP_SOCK: 10080 convert_ctx_access = bpf_xdp_sock_convert_ctx_access; 10081 break; 10082 case PTR_TO_BTF_ID: 10083 if (type == BPF_READ) { 10084 insn->code = BPF_LDX | BPF_PROBE_MEM | 10085 BPF_SIZE((insn)->code); 10086 env->prog->aux->num_exentries++; 10087 } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) { 10088 verbose(env, "Writes through BTF pointers are not allowed\n"); 10089 return -EINVAL; 10090 } 10091 continue; 10092 default: 10093 continue; 10094 } 10095 10096 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; 10097 size = BPF_LDST_BYTES(insn); 10098 10099 /* If the read access is a narrower load of the field, 10100 * convert to a 4/8-byte load, to minimum program type specific 10101 * convert_ctx_access changes. If conversion is successful, 10102 * we will apply proper mask to the result. 10103 */ 10104 is_narrower_load = size < ctx_field_size; 10105 size_default = bpf_ctx_off_adjust_machine(ctx_field_size); 10106 off = insn->off; 10107 if (is_narrower_load) { 10108 u8 size_code; 10109 10110 if (type == BPF_WRITE) { 10111 verbose(env, "bpf verifier narrow ctx access misconfigured\n"); 10112 return -EINVAL; 10113 } 10114 10115 size_code = BPF_H; 10116 if (ctx_field_size == 4) 10117 size_code = BPF_W; 10118 else if (ctx_field_size == 8) 10119 size_code = BPF_DW; 10120 10121 insn->off = off & ~(size_default - 1); 10122 insn->code = BPF_LDX | BPF_MEM | size_code; 10123 } 10124 10125 target_size = 0; 10126 cnt = convert_ctx_access(type, insn, insn_buf, env->prog, 10127 &target_size); 10128 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || 10129 (ctx_field_size && !target_size)) { 10130 verbose(env, "bpf verifier is misconfigured\n"); 10131 return -EINVAL; 10132 } 10133 10134 if (is_narrower_load && size < target_size) { 10135 u8 shift = bpf_ctx_narrow_access_offset( 10136 off, size, size_default) * 8; 10137 if (ctx_field_size <= 4) { 10138 if (shift) 10139 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, 10140 insn->dst_reg, 10141 shift); 10142 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, 10143 (1 << size * 8) - 1); 10144 } else { 10145 if (shift) 10146 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, 10147 insn->dst_reg, 10148 shift); 10149 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, 10150 (1ULL << size * 8) - 1); 10151 } 10152 } 10153 10154 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 10155 if (!new_prog) 10156 return -ENOMEM; 10157 10158 delta += cnt - 1; 10159 10160 /* keep walking new program and skip insns we just inserted */ 10161 env->prog = new_prog; 10162 insn = new_prog->insnsi + i + delta; 10163 } 10164 10165 return 0; 10166 } 10167 10168 static int jit_subprogs(struct bpf_verifier_env *env) 10169 { 10170 struct bpf_prog *prog = env->prog, **func, *tmp; 10171 int i, j, subprog_start, subprog_end = 0, len, subprog; 10172 struct bpf_insn *insn; 10173 void *old_bpf_func; 10174 int err, num_exentries; 10175 10176 if (env->subprog_cnt <= 1) 10177 return 0; 10178 10179 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 10180 if (insn->code != (BPF_JMP | BPF_CALL) || 10181 insn->src_reg != BPF_PSEUDO_CALL) 10182 continue; 10183 /* Upon error here we cannot fall back to interpreter but 10184 * need a hard reject of the program. Thus -EFAULT is 10185 * propagated in any case. 10186 */ 10187 subprog = find_subprog(env, i + insn->imm + 1); 10188 if (subprog < 0) { 10189 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", 10190 i + insn->imm + 1); 10191 return -EFAULT; 10192 } 10193 /* temporarily remember subprog id inside insn instead of 10194 * aux_data, since next loop will split up all insns into funcs 10195 */ 10196 insn->off = subprog; 10197 /* remember original imm in case JIT fails and fallback 10198 * to interpreter will be needed 10199 */ 10200 env->insn_aux_data[i].call_imm = insn->imm; 10201 /* point imm to __bpf_call_base+1 from JITs point of view */ 10202 insn->imm = 1; 10203 } 10204 10205 err = bpf_prog_alloc_jited_linfo(prog); 10206 if (err) 10207 goto out_undo_insn; 10208 10209 err = -ENOMEM; 10210 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL); 10211 if (!func) 10212 goto out_undo_insn; 10213 10214 for (i = 0; i < env->subprog_cnt; i++) { 10215 subprog_start = subprog_end; 10216 subprog_end = env->subprog_info[i + 1].start; 10217 10218 len = subprog_end - subprog_start; 10219 /* BPF_PROG_RUN doesn't call subprogs directly, 10220 * hence main prog stats include the runtime of subprogs. 10221 * subprogs don't have IDs and not reachable via prog_get_next_id 10222 * func[i]->aux->stats will never be accessed and stays NULL 10223 */ 10224 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER); 10225 if (!func[i]) 10226 goto out_free; 10227 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], 10228 len * sizeof(struct bpf_insn)); 10229 func[i]->type = prog->type; 10230 func[i]->len = len; 10231 if (bpf_prog_calc_tag(func[i])) 10232 goto out_free; 10233 func[i]->is_func = 1; 10234 func[i]->aux->func_idx = i; 10235 /* the btf and func_info will be freed only at prog->aux */ 10236 func[i]->aux->btf = prog->aux->btf; 10237 func[i]->aux->func_info = prog->aux->func_info; 10238 10239 /* Use bpf_prog_F_tag to indicate functions in stack traces. 10240 * Long term would need debug info to populate names 10241 */ 10242 func[i]->aux->name[0] = 'F'; 10243 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; 10244 func[i]->jit_requested = 1; 10245 func[i]->aux->linfo = prog->aux->linfo; 10246 func[i]->aux->nr_linfo = prog->aux->nr_linfo; 10247 func[i]->aux->jited_linfo = prog->aux->jited_linfo; 10248 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx; 10249 num_exentries = 0; 10250 insn = func[i]->insnsi; 10251 for (j = 0; j < func[i]->len; j++, insn++) { 10252 if (BPF_CLASS(insn->code) == BPF_LDX && 10253 BPF_MODE(insn->code) == BPF_PROBE_MEM) 10254 num_exentries++; 10255 } 10256 func[i]->aux->num_exentries = num_exentries; 10257 func[i] = bpf_int_jit_compile(func[i]); 10258 if (!func[i]->jited) { 10259 err = -ENOTSUPP; 10260 goto out_free; 10261 } 10262 cond_resched(); 10263 } 10264 /* at this point all bpf functions were successfully JITed 10265 * now populate all bpf_calls with correct addresses and 10266 * run last pass of JIT 10267 */ 10268 for (i = 0; i < env->subprog_cnt; i++) { 10269 insn = func[i]->insnsi; 10270 for (j = 0; j < func[i]->len; j++, insn++) { 10271 if (insn->code != (BPF_JMP | BPF_CALL) || 10272 insn->src_reg != BPF_PSEUDO_CALL) 10273 continue; 10274 subprog = insn->off; 10275 insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) - 10276 __bpf_call_base; 10277 } 10278 10279 /* we use the aux data to keep a list of the start addresses 10280 * of the JITed images for each function in the program 10281 * 10282 * for some architectures, such as powerpc64, the imm field 10283 * might not be large enough to hold the offset of the start 10284 * address of the callee's JITed image from __bpf_call_base 10285 * 10286 * in such cases, we can lookup the start address of a callee 10287 * by using its subprog id, available from the off field of 10288 * the call instruction, as an index for this list 10289 */ 10290 func[i]->aux->func = func; 10291 func[i]->aux->func_cnt = env->subprog_cnt; 10292 } 10293 for (i = 0; i < env->subprog_cnt; i++) { 10294 old_bpf_func = func[i]->bpf_func; 10295 tmp = bpf_int_jit_compile(func[i]); 10296 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { 10297 verbose(env, "JIT doesn't support bpf-to-bpf calls\n"); 10298 err = -ENOTSUPP; 10299 goto out_free; 10300 } 10301 cond_resched(); 10302 } 10303 10304 /* finally lock prog and jit images for all functions and 10305 * populate kallsysm 10306 */ 10307 for (i = 0; i < env->subprog_cnt; i++) { 10308 bpf_prog_lock_ro(func[i]); 10309 bpf_prog_kallsyms_add(func[i]); 10310 } 10311 10312 /* Last step: make now unused interpreter insns from main 10313 * prog consistent for later dump requests, so they can 10314 * later look the same as if they were interpreted only. 10315 */ 10316 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 10317 if (insn->code != (BPF_JMP | BPF_CALL) || 10318 insn->src_reg != BPF_PSEUDO_CALL) 10319 continue; 10320 insn->off = env->insn_aux_data[i].call_imm; 10321 subprog = find_subprog(env, i + insn->off + 1); 10322 insn->imm = subprog; 10323 } 10324 10325 prog->jited = 1; 10326 prog->bpf_func = func[0]->bpf_func; 10327 prog->aux->func = func; 10328 prog->aux->func_cnt = env->subprog_cnt; 10329 bpf_prog_free_unused_jited_linfo(prog); 10330 return 0; 10331 out_free: 10332 for (i = 0; i < env->subprog_cnt; i++) 10333 if (func[i]) 10334 bpf_jit_free(func[i]); 10335 kfree(func); 10336 out_undo_insn: 10337 /* cleanup main prog to be interpreted */ 10338 prog->jit_requested = 0; 10339 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 10340 if (insn->code != (BPF_JMP | BPF_CALL) || 10341 insn->src_reg != BPF_PSEUDO_CALL) 10342 continue; 10343 insn->off = 0; 10344 insn->imm = env->insn_aux_data[i].call_imm; 10345 } 10346 bpf_prog_free_jited_linfo(prog); 10347 return err; 10348 } 10349 10350 static int fixup_call_args(struct bpf_verifier_env *env) 10351 { 10352 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 10353 struct bpf_prog *prog = env->prog; 10354 struct bpf_insn *insn = prog->insnsi; 10355 int i, depth; 10356 #endif 10357 int err = 0; 10358 10359 if (env->prog->jit_requested && 10360 !bpf_prog_is_dev_bound(env->prog->aux)) { 10361 err = jit_subprogs(env); 10362 if (err == 0) 10363 return 0; 10364 if (err == -EFAULT) 10365 return err; 10366 } 10367 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 10368 for (i = 0; i < prog->len; i++, insn++) { 10369 if (insn->code != (BPF_JMP | BPF_CALL) || 10370 insn->src_reg != BPF_PSEUDO_CALL) 10371 continue; 10372 depth = get_callee_stack_depth(env, insn, i); 10373 if (depth < 0) 10374 return depth; 10375 bpf_patch_call_args(insn, depth); 10376 } 10377 err = 0; 10378 #endif 10379 return err; 10380 } 10381 10382 /* fixup insn->imm field of bpf_call instructions 10383 * and inline eligible helpers as explicit sequence of BPF instructions 10384 * 10385 * this function is called after eBPF program passed verification 10386 */ 10387 static int fixup_bpf_calls(struct bpf_verifier_env *env) 10388 { 10389 struct bpf_prog *prog = env->prog; 10390 bool expect_blinding = bpf_jit_blinding_enabled(prog); 10391 struct bpf_insn *insn = prog->insnsi; 10392 const struct bpf_func_proto *fn; 10393 const int insn_cnt = prog->len; 10394 const struct bpf_map_ops *ops; 10395 struct bpf_insn_aux_data *aux; 10396 struct bpf_insn insn_buf[16]; 10397 struct bpf_prog *new_prog; 10398 struct bpf_map *map_ptr; 10399 int i, ret, cnt, delta = 0; 10400 10401 for (i = 0; i < insn_cnt; i++, insn++) { 10402 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || 10403 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || 10404 insn->code == (BPF_ALU | BPF_MOD | BPF_X) || 10405 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { 10406 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; 10407 struct bpf_insn mask_and_div[] = { 10408 BPF_MOV32_REG(insn->src_reg, insn->src_reg), 10409 /* Rx div 0 -> 0 */ 10410 BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2), 10411 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg), 10412 BPF_JMP_IMM(BPF_JA, 0, 0, 1), 10413 *insn, 10414 }; 10415 struct bpf_insn mask_and_mod[] = { 10416 BPF_MOV32_REG(insn->src_reg, insn->src_reg), 10417 /* Rx mod 0 -> Rx */ 10418 BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1), 10419 *insn, 10420 }; 10421 struct bpf_insn *patchlet; 10422 10423 if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || 10424 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { 10425 patchlet = mask_and_div + (is64 ? 1 : 0); 10426 cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0); 10427 } else { 10428 patchlet = mask_and_mod + (is64 ? 1 : 0); 10429 cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0); 10430 } 10431 10432 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); 10433 if (!new_prog) 10434 return -ENOMEM; 10435 10436 delta += cnt - 1; 10437 env->prog = prog = new_prog; 10438 insn = new_prog->insnsi + i + delta; 10439 continue; 10440 } 10441 10442 if (BPF_CLASS(insn->code) == BPF_LD && 10443 (BPF_MODE(insn->code) == BPF_ABS || 10444 BPF_MODE(insn->code) == BPF_IND)) { 10445 cnt = env->ops->gen_ld_abs(insn, insn_buf); 10446 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { 10447 verbose(env, "bpf verifier is misconfigured\n"); 10448 return -EINVAL; 10449 } 10450 10451 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 10452 if (!new_prog) 10453 return -ENOMEM; 10454 10455 delta += cnt - 1; 10456 env->prog = prog = new_prog; 10457 insn = new_prog->insnsi + i + delta; 10458 continue; 10459 } 10460 10461 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || 10462 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { 10463 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; 10464 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; 10465 struct bpf_insn insn_buf[16]; 10466 struct bpf_insn *patch = &insn_buf[0]; 10467 bool issrc, isneg; 10468 u32 off_reg; 10469 10470 aux = &env->insn_aux_data[i + delta]; 10471 if (!aux->alu_state || 10472 aux->alu_state == BPF_ALU_NON_POINTER) 10473 continue; 10474 10475 isneg = aux->alu_state & BPF_ALU_NEG_VALUE; 10476 issrc = (aux->alu_state & BPF_ALU_SANITIZE) == 10477 BPF_ALU_SANITIZE_SRC; 10478 10479 off_reg = issrc ? insn->src_reg : insn->dst_reg; 10480 if (isneg) 10481 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); 10482 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1); 10483 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); 10484 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); 10485 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); 10486 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63); 10487 if (issrc) { 10488 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, 10489 off_reg); 10490 insn->src_reg = BPF_REG_AX; 10491 } else { 10492 *patch++ = BPF_ALU64_REG(BPF_AND, off_reg, 10493 BPF_REG_AX); 10494 } 10495 if (isneg) 10496 insn->code = insn->code == code_add ? 10497 code_sub : code_add; 10498 *patch++ = *insn; 10499 if (issrc && isneg) 10500 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); 10501 cnt = patch - insn_buf; 10502 10503 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 10504 if (!new_prog) 10505 return -ENOMEM; 10506 10507 delta += cnt - 1; 10508 env->prog = prog = new_prog; 10509 insn = new_prog->insnsi + i + delta; 10510 continue; 10511 } 10512 10513 if (insn->code != (BPF_JMP | BPF_CALL)) 10514 continue; 10515 if (insn->src_reg == BPF_PSEUDO_CALL) 10516 continue; 10517 10518 if (insn->imm == BPF_FUNC_get_route_realm) 10519 prog->dst_needed = 1; 10520 if (insn->imm == BPF_FUNC_get_prandom_u32) 10521 bpf_user_rnd_init_once(); 10522 if (insn->imm == BPF_FUNC_override_return) 10523 prog->kprobe_override = 1; 10524 if (insn->imm == BPF_FUNC_tail_call) { 10525 /* If we tail call into other programs, we 10526 * cannot make any assumptions since they can 10527 * be replaced dynamically during runtime in 10528 * the program array. 10529 */ 10530 prog->cb_access = 1; 10531 env->prog->aux->stack_depth = MAX_BPF_STACK; 10532 env->prog->aux->max_pkt_offset = MAX_PACKET_OFF; 10533 10534 /* mark bpf_tail_call as different opcode to avoid 10535 * conditional branch in the interpeter for every normal 10536 * call and to prevent accidental JITing by JIT compiler 10537 * that doesn't support bpf_tail_call yet 10538 */ 10539 insn->imm = 0; 10540 insn->code = BPF_JMP | BPF_TAIL_CALL; 10541 10542 aux = &env->insn_aux_data[i + delta]; 10543 if (env->bpf_capable && !expect_blinding && 10544 prog->jit_requested && 10545 !bpf_map_key_poisoned(aux) && 10546 !bpf_map_ptr_poisoned(aux) && 10547 !bpf_map_ptr_unpriv(aux)) { 10548 struct bpf_jit_poke_descriptor desc = { 10549 .reason = BPF_POKE_REASON_TAIL_CALL, 10550 .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state), 10551 .tail_call.key = bpf_map_key_immediate(aux), 10552 }; 10553 10554 ret = bpf_jit_add_poke_descriptor(prog, &desc); 10555 if (ret < 0) { 10556 verbose(env, "adding tail call poke descriptor failed\n"); 10557 return ret; 10558 } 10559 10560 insn->imm = ret + 1; 10561 continue; 10562 } 10563 10564 if (!bpf_map_ptr_unpriv(aux)) 10565 continue; 10566 10567 /* instead of changing every JIT dealing with tail_call 10568 * emit two extra insns: 10569 * if (index >= max_entries) goto out; 10570 * index &= array->index_mask; 10571 * to avoid out-of-bounds cpu speculation 10572 */ 10573 if (bpf_map_ptr_poisoned(aux)) { 10574 verbose(env, "tail_call abusing map_ptr\n"); 10575 return -EINVAL; 10576 } 10577 10578 map_ptr = BPF_MAP_PTR(aux->map_ptr_state); 10579 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, 10580 map_ptr->max_entries, 2); 10581 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, 10582 container_of(map_ptr, 10583 struct bpf_array, 10584 map)->index_mask); 10585 insn_buf[2] = *insn; 10586 cnt = 3; 10587 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 10588 if (!new_prog) 10589 return -ENOMEM; 10590 10591 delta += cnt - 1; 10592 env->prog = prog = new_prog; 10593 insn = new_prog->insnsi + i + delta; 10594 continue; 10595 } 10596 10597 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup 10598 * and other inlining handlers are currently limited to 64 bit 10599 * only. 10600 */ 10601 if (prog->jit_requested && BITS_PER_LONG == 64 && 10602 (insn->imm == BPF_FUNC_map_lookup_elem || 10603 insn->imm == BPF_FUNC_map_update_elem || 10604 insn->imm == BPF_FUNC_map_delete_elem || 10605 insn->imm == BPF_FUNC_map_push_elem || 10606 insn->imm == BPF_FUNC_map_pop_elem || 10607 insn->imm == BPF_FUNC_map_peek_elem)) { 10608 aux = &env->insn_aux_data[i + delta]; 10609 if (bpf_map_ptr_poisoned(aux)) 10610 goto patch_call_imm; 10611 10612 map_ptr = BPF_MAP_PTR(aux->map_ptr_state); 10613 ops = map_ptr->ops; 10614 if (insn->imm == BPF_FUNC_map_lookup_elem && 10615 ops->map_gen_lookup) { 10616 cnt = ops->map_gen_lookup(map_ptr, insn_buf); 10617 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { 10618 verbose(env, "bpf verifier is misconfigured\n"); 10619 return -EINVAL; 10620 } 10621 10622 new_prog = bpf_patch_insn_data(env, i + delta, 10623 insn_buf, cnt); 10624 if (!new_prog) 10625 return -ENOMEM; 10626 10627 delta += cnt - 1; 10628 env->prog = prog = new_prog; 10629 insn = new_prog->insnsi + i + delta; 10630 continue; 10631 } 10632 10633 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, 10634 (void *(*)(struct bpf_map *map, void *key))NULL)); 10635 BUILD_BUG_ON(!__same_type(ops->map_delete_elem, 10636 (int (*)(struct bpf_map *map, void *key))NULL)); 10637 BUILD_BUG_ON(!__same_type(ops->map_update_elem, 10638 (int (*)(struct bpf_map *map, void *key, void *value, 10639 u64 flags))NULL)); 10640 BUILD_BUG_ON(!__same_type(ops->map_push_elem, 10641 (int (*)(struct bpf_map *map, void *value, 10642 u64 flags))NULL)); 10643 BUILD_BUG_ON(!__same_type(ops->map_pop_elem, 10644 (int (*)(struct bpf_map *map, void *value))NULL)); 10645 BUILD_BUG_ON(!__same_type(ops->map_peek_elem, 10646 (int (*)(struct bpf_map *map, void *value))NULL)); 10647 10648 switch (insn->imm) { 10649 case BPF_FUNC_map_lookup_elem: 10650 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - 10651 __bpf_call_base; 10652 continue; 10653 case BPF_FUNC_map_update_elem: 10654 insn->imm = BPF_CAST_CALL(ops->map_update_elem) - 10655 __bpf_call_base; 10656 continue; 10657 case BPF_FUNC_map_delete_elem: 10658 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - 10659 __bpf_call_base; 10660 continue; 10661 case BPF_FUNC_map_push_elem: 10662 insn->imm = BPF_CAST_CALL(ops->map_push_elem) - 10663 __bpf_call_base; 10664 continue; 10665 case BPF_FUNC_map_pop_elem: 10666 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) - 10667 __bpf_call_base; 10668 continue; 10669 case BPF_FUNC_map_peek_elem: 10670 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - 10671 __bpf_call_base; 10672 continue; 10673 } 10674 10675 goto patch_call_imm; 10676 } 10677 10678 if (prog->jit_requested && BITS_PER_LONG == 64 && 10679 insn->imm == BPF_FUNC_jiffies64) { 10680 struct bpf_insn ld_jiffies_addr[2] = { 10681 BPF_LD_IMM64(BPF_REG_0, 10682 (unsigned long)&jiffies), 10683 }; 10684 10685 insn_buf[0] = ld_jiffies_addr[0]; 10686 insn_buf[1] = ld_jiffies_addr[1]; 10687 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, 10688 BPF_REG_0, 0); 10689 cnt = 3; 10690 10691 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 10692 cnt); 10693 if (!new_prog) 10694 return -ENOMEM; 10695 10696 delta += cnt - 1; 10697 env->prog = prog = new_prog; 10698 insn = new_prog->insnsi + i + delta; 10699 continue; 10700 } 10701 10702 patch_call_imm: 10703 fn = env->ops->get_func_proto(insn->imm, env->prog); 10704 /* all functions that have prototype and verifier allowed 10705 * programs to call them, must be real in-kernel functions 10706 */ 10707 if (!fn->func) { 10708 verbose(env, 10709 "kernel subsystem misconfigured func %s#%d\n", 10710 func_id_name(insn->imm), insn->imm); 10711 return -EFAULT; 10712 } 10713 insn->imm = fn->func - __bpf_call_base; 10714 } 10715 10716 /* Since poke tab is now finalized, publish aux to tracker. */ 10717 for (i = 0; i < prog->aux->size_poke_tab; i++) { 10718 map_ptr = prog->aux->poke_tab[i].tail_call.map; 10719 if (!map_ptr->ops->map_poke_track || 10720 !map_ptr->ops->map_poke_untrack || 10721 !map_ptr->ops->map_poke_run) { 10722 verbose(env, "bpf verifier is misconfigured\n"); 10723 return -EINVAL; 10724 } 10725 10726 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux); 10727 if (ret < 0) { 10728 verbose(env, "tracking tail call prog failed\n"); 10729 return ret; 10730 } 10731 } 10732 10733 return 0; 10734 } 10735 10736 static void free_states(struct bpf_verifier_env *env) 10737 { 10738 struct bpf_verifier_state_list *sl, *sln; 10739 int i; 10740 10741 sl = env->free_list; 10742 while (sl) { 10743 sln = sl->next; 10744 free_verifier_state(&sl->state, false); 10745 kfree(sl); 10746 sl = sln; 10747 } 10748 env->free_list = NULL; 10749 10750 if (!env->explored_states) 10751 return; 10752 10753 for (i = 0; i < state_htab_size(env); i++) { 10754 sl = env->explored_states[i]; 10755 10756 while (sl) { 10757 sln = sl->next; 10758 free_verifier_state(&sl->state, false); 10759 kfree(sl); 10760 sl = sln; 10761 } 10762 env->explored_states[i] = NULL; 10763 } 10764 } 10765 10766 /* The verifier is using insn_aux_data[] to store temporary data during 10767 * verification and to store information for passes that run after the 10768 * verification like dead code sanitization. do_check_common() for subprogram N 10769 * may analyze many other subprograms. sanitize_insn_aux_data() clears all 10770 * temporary data after do_check_common() finds that subprogram N cannot be 10771 * verified independently. pass_cnt counts the number of times 10772 * do_check_common() was run and insn->aux->seen tells the pass number 10773 * insn_aux_data was touched. These variables are compared to clear temporary 10774 * data from failed pass. For testing and experiments do_check_common() can be 10775 * run multiple times even when prior attempt to verify is unsuccessful. 10776 */ 10777 static void sanitize_insn_aux_data(struct bpf_verifier_env *env) 10778 { 10779 struct bpf_insn *insn = env->prog->insnsi; 10780 struct bpf_insn_aux_data *aux; 10781 int i, class; 10782 10783 for (i = 0; i < env->prog->len; i++) { 10784 class = BPF_CLASS(insn[i].code); 10785 if (class != BPF_LDX && class != BPF_STX) 10786 continue; 10787 aux = &env->insn_aux_data[i]; 10788 if (aux->seen != env->pass_cnt) 10789 continue; 10790 memset(aux, 0, offsetof(typeof(*aux), orig_idx)); 10791 } 10792 } 10793 10794 static int do_check_common(struct bpf_verifier_env *env, int subprog) 10795 { 10796 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); 10797 struct bpf_verifier_state *state; 10798 struct bpf_reg_state *regs; 10799 int ret, i; 10800 10801 env->prev_linfo = NULL; 10802 env->pass_cnt++; 10803 10804 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); 10805 if (!state) 10806 return -ENOMEM; 10807 state->curframe = 0; 10808 state->speculative = false; 10809 state->branches = 1; 10810 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); 10811 if (!state->frame[0]) { 10812 kfree(state); 10813 return -ENOMEM; 10814 } 10815 env->cur_state = state; 10816 init_func_state(env, state->frame[0], 10817 BPF_MAIN_FUNC /* callsite */, 10818 0 /* frameno */, 10819 subprog); 10820 10821 regs = state->frame[state->curframe]->regs; 10822 if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) { 10823 ret = btf_prepare_func_args(env, subprog, regs); 10824 if (ret) 10825 goto out; 10826 for (i = BPF_REG_1; i <= BPF_REG_5; i++) { 10827 if (regs[i].type == PTR_TO_CTX) 10828 mark_reg_known_zero(env, regs, i); 10829 else if (regs[i].type == SCALAR_VALUE) 10830 mark_reg_unknown(env, regs, i); 10831 } 10832 } else { 10833 /* 1st arg to a function */ 10834 regs[BPF_REG_1].type = PTR_TO_CTX; 10835 mark_reg_known_zero(env, regs, BPF_REG_1); 10836 ret = btf_check_func_arg_match(env, subprog, regs); 10837 if (ret == -EFAULT) 10838 /* unlikely verifier bug. abort. 10839 * ret == 0 and ret < 0 are sadly acceptable for 10840 * main() function due to backward compatibility. 10841 * Like socket filter program may be written as: 10842 * int bpf_prog(struct pt_regs *ctx) 10843 * and never dereference that ctx in the program. 10844 * 'struct pt_regs' is a type mismatch for socket 10845 * filter that should be using 'struct __sk_buff'. 10846 */ 10847 goto out; 10848 } 10849 10850 ret = do_check(env); 10851 out: 10852 /* check for NULL is necessary, since cur_state can be freed inside 10853 * do_check() under memory pressure. 10854 */ 10855 if (env->cur_state) { 10856 free_verifier_state(env->cur_state, true); 10857 env->cur_state = NULL; 10858 } 10859 while (!pop_stack(env, NULL, NULL, false)); 10860 if (!ret && pop_log) 10861 bpf_vlog_reset(&env->log, 0); 10862 free_states(env); 10863 if (ret) 10864 /* clean aux data in case subprog was rejected */ 10865 sanitize_insn_aux_data(env); 10866 return ret; 10867 } 10868 10869 /* Verify all global functions in a BPF program one by one based on their BTF. 10870 * All global functions must pass verification. Otherwise the whole program is rejected. 10871 * Consider: 10872 * int bar(int); 10873 * int foo(int f) 10874 * { 10875 * return bar(f); 10876 * } 10877 * int bar(int b) 10878 * { 10879 * ... 10880 * } 10881 * foo() will be verified first for R1=any_scalar_value. During verification it 10882 * will be assumed that bar() already verified successfully and call to bar() 10883 * from foo() will be checked for type match only. Later bar() will be verified 10884 * independently to check that it's safe for R1=any_scalar_value. 10885 */ 10886 static int do_check_subprogs(struct bpf_verifier_env *env) 10887 { 10888 struct bpf_prog_aux *aux = env->prog->aux; 10889 int i, ret; 10890 10891 if (!aux->func_info) 10892 return 0; 10893 10894 for (i = 1; i < env->subprog_cnt; i++) { 10895 if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL) 10896 continue; 10897 env->insn_idx = env->subprog_info[i].start; 10898 WARN_ON_ONCE(env->insn_idx == 0); 10899 ret = do_check_common(env, i); 10900 if (ret) { 10901 return ret; 10902 } else if (env->log.level & BPF_LOG_LEVEL) { 10903 verbose(env, 10904 "Func#%d is safe for any args that match its prototype\n", 10905 i); 10906 } 10907 } 10908 return 0; 10909 } 10910 10911 static int do_check_main(struct bpf_verifier_env *env) 10912 { 10913 int ret; 10914 10915 env->insn_idx = 0; 10916 ret = do_check_common(env, 0); 10917 if (!ret) 10918 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; 10919 return ret; 10920 } 10921 10922 10923 static void print_verification_stats(struct bpf_verifier_env *env) 10924 { 10925 int i; 10926 10927 if (env->log.level & BPF_LOG_STATS) { 10928 verbose(env, "verification time %lld usec\n", 10929 div_u64(env->verification_time, 1000)); 10930 verbose(env, "stack depth "); 10931 for (i = 0; i < env->subprog_cnt; i++) { 10932 u32 depth = env->subprog_info[i].stack_depth; 10933 10934 verbose(env, "%d", depth); 10935 if (i + 1 < env->subprog_cnt) 10936 verbose(env, "+"); 10937 } 10938 verbose(env, "\n"); 10939 } 10940 verbose(env, "processed %d insns (limit %d) max_states_per_insn %d " 10941 "total_states %d peak_states %d mark_read %d\n", 10942 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS, 10943 env->max_states_per_insn, env->total_states, 10944 env->peak_states, env->longest_mark_read_walk); 10945 } 10946 10947 static int check_struct_ops_btf_id(struct bpf_verifier_env *env) 10948 { 10949 const struct btf_type *t, *func_proto; 10950 const struct bpf_struct_ops *st_ops; 10951 const struct btf_member *member; 10952 struct bpf_prog *prog = env->prog; 10953 u32 btf_id, member_idx; 10954 const char *mname; 10955 10956 btf_id = prog->aux->attach_btf_id; 10957 st_ops = bpf_struct_ops_find(btf_id); 10958 if (!st_ops) { 10959 verbose(env, "attach_btf_id %u is not a supported struct\n", 10960 btf_id); 10961 return -ENOTSUPP; 10962 } 10963 10964 t = st_ops->type; 10965 member_idx = prog->expected_attach_type; 10966 if (member_idx >= btf_type_vlen(t)) { 10967 verbose(env, "attach to invalid member idx %u of struct %s\n", 10968 member_idx, st_ops->name); 10969 return -EINVAL; 10970 } 10971 10972 member = &btf_type_member(t)[member_idx]; 10973 mname = btf_name_by_offset(btf_vmlinux, member->name_off); 10974 func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type, 10975 NULL); 10976 if (!func_proto) { 10977 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n", 10978 mname, member_idx, st_ops->name); 10979 return -EINVAL; 10980 } 10981 10982 if (st_ops->check_member) { 10983 int err = st_ops->check_member(t, member); 10984 10985 if (err) { 10986 verbose(env, "attach to unsupported member %s of struct %s\n", 10987 mname, st_ops->name); 10988 return err; 10989 } 10990 } 10991 10992 prog->aux->attach_func_proto = func_proto; 10993 prog->aux->attach_func_name = mname; 10994 env->ops = st_ops->verifier_ops; 10995 10996 return 0; 10997 } 10998 #define SECURITY_PREFIX "security_" 10999 11000 static int check_attach_modify_return(struct bpf_prog *prog, unsigned long addr) 11001 { 11002 if (within_error_injection_list(addr) || 11003 !strncmp(SECURITY_PREFIX, prog->aux->attach_func_name, 11004 sizeof(SECURITY_PREFIX) - 1)) 11005 return 0; 11006 11007 return -EINVAL; 11008 } 11009 11010 /* non exhaustive list of sleepable bpf_lsm_*() functions */ 11011 BTF_SET_START(btf_sleepable_lsm_hooks) 11012 #ifdef CONFIG_BPF_LSM 11013 BTF_ID(func, bpf_lsm_bprm_committed_creds) 11014 #else 11015 BTF_ID_UNUSED 11016 #endif 11017 BTF_SET_END(btf_sleepable_lsm_hooks) 11018 11019 static int check_sleepable_lsm_hook(u32 btf_id) 11020 { 11021 return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id); 11022 } 11023 11024 /* list of non-sleepable functions that are otherwise on 11025 * ALLOW_ERROR_INJECTION list 11026 */ 11027 BTF_SET_START(btf_non_sleepable_error_inject) 11028 /* Three functions below can be called from sleepable and non-sleepable context. 11029 * Assume non-sleepable from bpf safety point of view. 11030 */ 11031 BTF_ID(func, __add_to_page_cache_locked) 11032 BTF_ID(func, should_fail_alloc_page) 11033 BTF_ID(func, should_failslab) 11034 BTF_SET_END(btf_non_sleepable_error_inject) 11035 11036 static int check_non_sleepable_error_inject(u32 btf_id) 11037 { 11038 return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id); 11039 } 11040 11041 static int check_attach_btf_id(struct bpf_verifier_env *env) 11042 { 11043 struct bpf_prog *prog = env->prog; 11044 bool prog_extension = prog->type == BPF_PROG_TYPE_EXT; 11045 struct bpf_prog *tgt_prog = prog->aux->linked_prog; 11046 u32 btf_id = prog->aux->attach_btf_id; 11047 const char prefix[] = "btf_trace_"; 11048 struct btf_func_model fmodel; 11049 int ret = 0, subprog = -1, i; 11050 struct bpf_trampoline *tr; 11051 const struct btf_type *t; 11052 bool conservative = true; 11053 const char *tname; 11054 struct btf *btf; 11055 long addr; 11056 u64 key; 11057 11058 if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING && 11059 prog->type != BPF_PROG_TYPE_LSM) { 11060 verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n"); 11061 return -EINVAL; 11062 } 11063 11064 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) 11065 return check_struct_ops_btf_id(env); 11066 11067 if (prog->type != BPF_PROG_TYPE_TRACING && 11068 prog->type != BPF_PROG_TYPE_LSM && 11069 !prog_extension) 11070 return 0; 11071 11072 if (!btf_id) { 11073 verbose(env, "Tracing programs must provide btf_id\n"); 11074 return -EINVAL; 11075 } 11076 btf = bpf_prog_get_target_btf(prog); 11077 if (!btf) { 11078 verbose(env, 11079 "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n"); 11080 return -EINVAL; 11081 } 11082 t = btf_type_by_id(btf, btf_id); 11083 if (!t) { 11084 verbose(env, "attach_btf_id %u is invalid\n", btf_id); 11085 return -EINVAL; 11086 } 11087 tname = btf_name_by_offset(btf, t->name_off); 11088 if (!tname) { 11089 verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id); 11090 return -EINVAL; 11091 } 11092 if (tgt_prog) { 11093 struct bpf_prog_aux *aux = tgt_prog->aux; 11094 11095 for (i = 0; i < aux->func_info_cnt; i++) 11096 if (aux->func_info[i].type_id == btf_id) { 11097 subprog = i; 11098 break; 11099 } 11100 if (subprog == -1) { 11101 verbose(env, "Subprog %s doesn't exist\n", tname); 11102 return -EINVAL; 11103 } 11104 conservative = aux->func_info_aux[subprog].unreliable; 11105 if (prog_extension) { 11106 if (conservative) { 11107 verbose(env, 11108 "Cannot replace static functions\n"); 11109 return -EINVAL; 11110 } 11111 if (!prog->jit_requested) { 11112 verbose(env, 11113 "Extension programs should be JITed\n"); 11114 return -EINVAL; 11115 } 11116 env->ops = bpf_verifier_ops[tgt_prog->type]; 11117 prog->expected_attach_type = tgt_prog->expected_attach_type; 11118 } 11119 if (!tgt_prog->jited) { 11120 verbose(env, "Can attach to only JITed progs\n"); 11121 return -EINVAL; 11122 } 11123 if (tgt_prog->type == prog->type) { 11124 /* Cannot fentry/fexit another fentry/fexit program. 11125 * Cannot attach program extension to another extension. 11126 * It's ok to attach fentry/fexit to extension program. 11127 */ 11128 verbose(env, "Cannot recursively attach\n"); 11129 return -EINVAL; 11130 } 11131 if (tgt_prog->type == BPF_PROG_TYPE_TRACING && 11132 prog_extension && 11133 (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY || 11134 tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) { 11135 /* Program extensions can extend all program types 11136 * except fentry/fexit. The reason is the following. 11137 * The fentry/fexit programs are used for performance 11138 * analysis, stats and can be attached to any program 11139 * type except themselves. When extension program is 11140 * replacing XDP function it is necessary to allow 11141 * performance analysis of all functions. Both original 11142 * XDP program and its program extension. Hence 11143 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is 11144 * allowed. If extending of fentry/fexit was allowed it 11145 * would be possible to create long call chain 11146 * fentry->extension->fentry->extension beyond 11147 * reasonable stack size. Hence extending fentry is not 11148 * allowed. 11149 */ 11150 verbose(env, "Cannot extend fentry/fexit\n"); 11151 return -EINVAL; 11152 } 11153 key = ((u64)aux->id) << 32 | btf_id; 11154 } else { 11155 if (prog_extension) { 11156 verbose(env, "Cannot replace kernel functions\n"); 11157 return -EINVAL; 11158 } 11159 key = btf_id; 11160 } 11161 11162 switch (prog->expected_attach_type) { 11163 case BPF_TRACE_RAW_TP: 11164 if (tgt_prog) { 11165 verbose(env, 11166 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n"); 11167 return -EINVAL; 11168 } 11169 if (!btf_type_is_typedef(t)) { 11170 verbose(env, "attach_btf_id %u is not a typedef\n", 11171 btf_id); 11172 return -EINVAL; 11173 } 11174 if (strncmp(prefix, tname, sizeof(prefix) - 1)) { 11175 verbose(env, "attach_btf_id %u points to wrong type name %s\n", 11176 btf_id, tname); 11177 return -EINVAL; 11178 } 11179 tname += sizeof(prefix) - 1; 11180 t = btf_type_by_id(btf, t->type); 11181 if (!btf_type_is_ptr(t)) 11182 /* should never happen in valid vmlinux build */ 11183 return -EINVAL; 11184 t = btf_type_by_id(btf, t->type); 11185 if (!btf_type_is_func_proto(t)) 11186 /* should never happen in valid vmlinux build */ 11187 return -EINVAL; 11188 11189 /* remember two read only pointers that are valid for 11190 * the life time of the kernel 11191 */ 11192 prog->aux->attach_func_name = tname; 11193 prog->aux->attach_func_proto = t; 11194 prog->aux->attach_btf_trace = true; 11195 return 0; 11196 case BPF_TRACE_ITER: 11197 if (!btf_type_is_func(t)) { 11198 verbose(env, "attach_btf_id %u is not a function\n", 11199 btf_id); 11200 return -EINVAL; 11201 } 11202 t = btf_type_by_id(btf, t->type); 11203 if (!btf_type_is_func_proto(t)) 11204 return -EINVAL; 11205 prog->aux->attach_func_name = tname; 11206 prog->aux->attach_func_proto = t; 11207 if (!bpf_iter_prog_supported(prog)) 11208 return -EINVAL; 11209 ret = btf_distill_func_proto(&env->log, btf, t, 11210 tname, &fmodel); 11211 return ret; 11212 default: 11213 if (!prog_extension) 11214 return -EINVAL; 11215 /* fallthrough */ 11216 case BPF_MODIFY_RETURN: 11217 case BPF_LSM_MAC: 11218 case BPF_TRACE_FENTRY: 11219 case BPF_TRACE_FEXIT: 11220 prog->aux->attach_func_name = tname; 11221 if (prog->type == BPF_PROG_TYPE_LSM) { 11222 ret = bpf_lsm_verify_prog(&env->log, prog); 11223 if (ret < 0) 11224 return ret; 11225 } 11226 11227 if (!btf_type_is_func(t)) { 11228 verbose(env, "attach_btf_id %u is not a function\n", 11229 btf_id); 11230 return -EINVAL; 11231 } 11232 if (prog_extension && 11233 btf_check_type_match(env, prog, btf, t)) 11234 return -EINVAL; 11235 t = btf_type_by_id(btf, t->type); 11236 if (!btf_type_is_func_proto(t)) 11237 return -EINVAL; 11238 tr = bpf_trampoline_lookup(key); 11239 if (!tr) 11240 return -ENOMEM; 11241 /* t is either vmlinux type or another program's type */ 11242 prog->aux->attach_func_proto = t; 11243 mutex_lock(&tr->mutex); 11244 if (tr->func.addr) { 11245 prog->aux->trampoline = tr; 11246 goto out; 11247 } 11248 if (tgt_prog && conservative) { 11249 prog->aux->attach_func_proto = NULL; 11250 t = NULL; 11251 } 11252 ret = btf_distill_func_proto(&env->log, btf, t, 11253 tname, &tr->func.model); 11254 if (ret < 0) 11255 goto out; 11256 if (tgt_prog) { 11257 if (subprog == 0) 11258 addr = (long) tgt_prog->bpf_func; 11259 else 11260 addr = (long) tgt_prog->aux->func[subprog]->bpf_func; 11261 } else { 11262 addr = kallsyms_lookup_name(tname); 11263 if (!addr) { 11264 verbose(env, 11265 "The address of function %s cannot be found\n", 11266 tname); 11267 ret = -ENOENT; 11268 goto out; 11269 } 11270 } 11271 11272 if (prog->aux->sleepable) { 11273 ret = -EINVAL; 11274 switch (prog->type) { 11275 case BPF_PROG_TYPE_TRACING: 11276 /* fentry/fexit/fmod_ret progs can be sleepable only if they are 11277 * attached to ALLOW_ERROR_INJECTION and are not in denylist. 11278 */ 11279 if (!check_non_sleepable_error_inject(btf_id) && 11280 within_error_injection_list(addr)) 11281 ret = 0; 11282 break; 11283 case BPF_PROG_TYPE_LSM: 11284 /* LSM progs check that they are attached to bpf_lsm_*() funcs. 11285 * Only some of them are sleepable. 11286 */ 11287 if (check_sleepable_lsm_hook(btf_id)) 11288 ret = 0; 11289 break; 11290 default: 11291 break; 11292 } 11293 if (ret) 11294 verbose(env, "%s is not sleepable\n", 11295 prog->aux->attach_func_name); 11296 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) { 11297 ret = check_attach_modify_return(prog, addr); 11298 if (ret) 11299 verbose(env, "%s() is not modifiable\n", 11300 prog->aux->attach_func_name); 11301 } 11302 if (ret) 11303 goto out; 11304 tr->func.addr = (void *)addr; 11305 prog->aux->trampoline = tr; 11306 out: 11307 mutex_unlock(&tr->mutex); 11308 if (ret) 11309 bpf_trampoline_put(tr); 11310 return ret; 11311 } 11312 } 11313 11314 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, 11315 union bpf_attr __user *uattr) 11316 { 11317 u64 start_time = ktime_get_ns(); 11318 struct bpf_verifier_env *env; 11319 struct bpf_verifier_log *log; 11320 int i, len, ret = -EINVAL; 11321 bool is_priv; 11322 11323 /* no program is valid */ 11324 if (ARRAY_SIZE(bpf_verifier_ops) == 0) 11325 return -EINVAL; 11326 11327 /* 'struct bpf_verifier_env' can be global, but since it's not small, 11328 * allocate/free it every time bpf_check() is called 11329 */ 11330 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); 11331 if (!env) 11332 return -ENOMEM; 11333 log = &env->log; 11334 11335 len = (*prog)->len; 11336 env->insn_aux_data = 11337 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len)); 11338 ret = -ENOMEM; 11339 if (!env->insn_aux_data) 11340 goto err_free_env; 11341 for (i = 0; i < len; i++) 11342 env->insn_aux_data[i].orig_idx = i; 11343 env->prog = *prog; 11344 env->ops = bpf_verifier_ops[env->prog->type]; 11345 is_priv = bpf_capable(); 11346 11347 if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { 11348 mutex_lock(&bpf_verifier_lock); 11349 if (!btf_vmlinux) 11350 btf_vmlinux = btf_parse_vmlinux(); 11351 mutex_unlock(&bpf_verifier_lock); 11352 } 11353 11354 /* grab the mutex to protect few globals used by verifier */ 11355 if (!is_priv) 11356 mutex_lock(&bpf_verifier_lock); 11357 11358 if (attr->log_level || attr->log_buf || attr->log_size) { 11359 /* user requested verbose verifier output 11360 * and supplied buffer to store the verification trace 11361 */ 11362 log->level = attr->log_level; 11363 log->ubuf = (char __user *) (unsigned long) attr->log_buf; 11364 log->len_total = attr->log_size; 11365 11366 ret = -EINVAL; 11367 /* log attributes have to be sane */ 11368 if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 || 11369 !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK) 11370 goto err_unlock; 11371 } 11372 11373 if (IS_ERR(btf_vmlinux)) { 11374 /* Either gcc or pahole or kernel are broken. */ 11375 verbose(env, "in-kernel BTF is malformed\n"); 11376 ret = PTR_ERR(btf_vmlinux); 11377 goto skip_full_check; 11378 } 11379 11380 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); 11381 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) 11382 env->strict_alignment = true; 11383 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) 11384 env->strict_alignment = false; 11385 11386 env->allow_ptr_leaks = bpf_allow_ptr_leaks(); 11387 env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access(); 11388 env->bypass_spec_v1 = bpf_bypass_spec_v1(); 11389 env->bypass_spec_v4 = bpf_bypass_spec_v4(); 11390 env->bpf_capable = bpf_capable(); 11391 11392 if (is_priv) 11393 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; 11394 11395 ret = replace_map_fd_with_map_ptr(env); 11396 if (ret < 0) 11397 goto skip_full_check; 11398 11399 if (bpf_prog_is_dev_bound(env->prog->aux)) { 11400 ret = bpf_prog_offload_verifier_prep(env->prog); 11401 if (ret) 11402 goto skip_full_check; 11403 } 11404 11405 env->explored_states = kvcalloc(state_htab_size(env), 11406 sizeof(struct bpf_verifier_state_list *), 11407 GFP_USER); 11408 ret = -ENOMEM; 11409 if (!env->explored_states) 11410 goto skip_full_check; 11411 11412 ret = check_subprogs(env); 11413 if (ret < 0) 11414 goto skip_full_check; 11415 11416 ret = check_btf_info(env, attr, uattr); 11417 if (ret < 0) 11418 goto skip_full_check; 11419 11420 ret = check_attach_btf_id(env); 11421 if (ret) 11422 goto skip_full_check; 11423 11424 ret = check_cfg(env); 11425 if (ret < 0) 11426 goto skip_full_check; 11427 11428 ret = do_check_subprogs(env); 11429 ret = ret ?: do_check_main(env); 11430 11431 if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux)) 11432 ret = bpf_prog_offload_finalize(env); 11433 11434 skip_full_check: 11435 kvfree(env->explored_states); 11436 11437 if (ret == 0) 11438 ret = check_max_stack_depth(env); 11439 11440 /* instruction rewrites happen after this point */ 11441 if (is_priv) { 11442 if (ret == 0) 11443 opt_hard_wire_dead_code_branches(env); 11444 if (ret == 0) 11445 ret = opt_remove_dead_code(env); 11446 if (ret == 0) 11447 ret = opt_remove_nops(env); 11448 } else { 11449 if (ret == 0) 11450 sanitize_dead_code(env); 11451 } 11452 11453 if (ret == 0) 11454 /* program is valid, convert *(u32*)(ctx + off) accesses */ 11455 ret = convert_ctx_accesses(env); 11456 11457 if (ret == 0) 11458 ret = fixup_bpf_calls(env); 11459 11460 /* do 32-bit optimization after insn patching has done so those patched 11461 * insns could be handled correctly. 11462 */ 11463 if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) { 11464 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr); 11465 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret 11466 : false; 11467 } 11468 11469 if (ret == 0) 11470 ret = fixup_call_args(env); 11471 11472 env->verification_time = ktime_get_ns() - start_time; 11473 print_verification_stats(env); 11474 11475 if (log->level && bpf_verifier_log_full(log)) 11476 ret = -ENOSPC; 11477 if (log->level && !log->ubuf) { 11478 ret = -EFAULT; 11479 goto err_release_maps; 11480 } 11481 11482 if (ret == 0 && env->used_map_cnt) { 11483 /* if program passed verifier, update used_maps in bpf_prog_info */ 11484 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt, 11485 sizeof(env->used_maps[0]), 11486 GFP_KERNEL); 11487 11488 if (!env->prog->aux->used_maps) { 11489 ret = -ENOMEM; 11490 goto err_release_maps; 11491 } 11492 11493 memcpy(env->prog->aux->used_maps, env->used_maps, 11494 sizeof(env->used_maps[0]) * env->used_map_cnt); 11495 env->prog->aux->used_map_cnt = env->used_map_cnt; 11496 11497 /* program is valid. Convert pseudo bpf_ld_imm64 into generic 11498 * bpf_ld_imm64 instructions 11499 */ 11500 convert_pseudo_ld_imm64(env); 11501 } 11502 11503 if (ret == 0) 11504 adjust_btf_func(env); 11505 11506 err_release_maps: 11507 if (!env->prog->aux->used_maps) 11508 /* if we didn't copy map pointers into bpf_prog_info, release 11509 * them now. Otherwise free_used_maps() will release them. 11510 */ 11511 release_maps(env); 11512 11513 /* extension progs temporarily inherit the attach_type of their targets 11514 for verification purposes, so set it back to zero before returning 11515 */ 11516 if (env->prog->type == BPF_PROG_TYPE_EXT) 11517 env->prog->expected_attach_type = 0; 11518 11519 *prog = env->prog; 11520 err_unlock: 11521 if (!is_priv) 11522 mutex_unlock(&bpf_verifier_lock); 11523 vfree(env->insn_aux_data); 11524 err_free_env: 11525 kfree(env); 11526 return ret; 11527 } 11528