1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io 5 */ 6 #include <uapi/linux/btf.h> 7 #include <linux/kernel.h> 8 #include <linux/types.h> 9 #include <linux/slab.h> 10 #include <linux/bpf.h> 11 #include <linux/btf.h> 12 #include <linux/bpf_verifier.h> 13 #include <linux/filter.h> 14 #include <net/netlink.h> 15 #include <linux/file.h> 16 #include <linux/vmalloc.h> 17 #include <linux/stringify.h> 18 #include <linux/bsearch.h> 19 #include <linux/sort.h> 20 #include <linux/perf_event.h> 21 #include <linux/ctype.h> 22 23 #include "disasm.h" 24 25 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { 26 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 27 [_id] = & _name ## _verifier_ops, 28 #define BPF_MAP_TYPE(_id, _ops) 29 #include <linux/bpf_types.h> 30 #undef BPF_PROG_TYPE 31 #undef BPF_MAP_TYPE 32 }; 33 34 /* bpf_check() is a static code analyzer that walks eBPF program 35 * instruction by instruction and updates register/stack state. 36 * All paths of conditional branches are analyzed until 'bpf_exit' insn. 37 * 38 * The first pass is depth-first-search to check that the program is a DAG. 39 * It rejects the following programs: 40 * - larger than BPF_MAXINSNS insns 41 * - if loop is present (detected via back-edge) 42 * - unreachable insns exist (shouldn't be a forest. program = one function) 43 * - out of bounds or malformed jumps 44 * The second pass is all possible path descent from the 1st insn. 45 * Since it's analyzing all pathes through the program, the length of the 46 * analysis is limited to 64k insn, which may be hit even if total number of 47 * insn is less then 4K, but there are too many branches that change stack/regs. 48 * Number of 'branches to be analyzed' is limited to 1k 49 * 50 * On entry to each instruction, each register has a type, and the instruction 51 * changes the types of the registers depending on instruction semantics. 52 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is 53 * copied to R1. 54 * 55 * All registers are 64-bit. 56 * R0 - return register 57 * R1-R5 argument passing registers 58 * R6-R9 callee saved registers 59 * R10 - frame pointer read-only 60 * 61 * At the start of BPF program the register R1 contains a pointer to bpf_context 62 * and has type PTR_TO_CTX. 63 * 64 * Verifier tracks arithmetic operations on pointers in case: 65 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 66 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20), 67 * 1st insn copies R10 (which has FRAME_PTR) type into R1 68 * and 2nd arithmetic instruction is pattern matched to recognize 69 * that it wants to construct a pointer to some element within stack. 70 * So after 2nd insn, the register R1 has type PTR_TO_STACK 71 * (and -20 constant is saved for further stack bounds checking). 72 * Meaning that this reg is a pointer to stack plus known immediate constant. 73 * 74 * Most of the time the registers have SCALAR_VALUE type, which 75 * means the register has some value, but it's not a valid pointer. 76 * (like pointer plus pointer becomes SCALAR_VALUE type) 77 * 78 * When verifier sees load or store instructions the type of base register 79 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are 80 * four pointer types recognized by check_mem_access() function. 81 * 82 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value' 83 * and the range of [ptr, ptr + map's value_size) is accessible. 84 * 85 * registers used to pass values to function calls are checked against 86 * function argument constraints. 87 * 88 * ARG_PTR_TO_MAP_KEY is one of such argument constraints. 89 * It means that the register type passed to this function must be 90 * PTR_TO_STACK and it will be used inside the function as 91 * 'pointer to map element key' 92 * 93 * For example the argument constraints for bpf_map_lookup_elem(): 94 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 95 * .arg1_type = ARG_CONST_MAP_PTR, 96 * .arg2_type = ARG_PTR_TO_MAP_KEY, 97 * 98 * ret_type says that this function returns 'pointer to map elem value or null' 99 * function expects 1st argument to be a const pointer to 'struct bpf_map' and 100 * 2nd argument should be a pointer to stack, which will be used inside 101 * the helper function as a pointer to map element key. 102 * 103 * On the kernel side the helper function looks like: 104 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 105 * { 106 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; 107 * void *key = (void *) (unsigned long) r2; 108 * void *value; 109 * 110 * here kernel can access 'key' and 'map' pointers safely, knowing that 111 * [key, key + map->key_size) bytes are valid and were initialized on 112 * the stack of eBPF program. 113 * } 114 * 115 * Corresponding eBPF program may look like: 116 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR 117 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK 118 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP 119 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 120 * here verifier looks at prototype of map_lookup_elem() and sees: 121 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok, 122 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes 123 * 124 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far, 125 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits 126 * and were initialized prior to this call. 127 * If it's ok, then verifier allows this BPF_CALL insn and looks at 128 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets 129 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function 130 * returns ether pointer to map value or NULL. 131 * 132 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off' 133 * insn, the register holding that pointer in the true branch changes state to 134 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false 135 * branch. See check_cond_jmp_op(). 136 * 137 * After the call R0 is set to return type of the function and registers R1-R5 138 * are set to NOT_INIT to indicate that they are no longer readable. 139 * 140 * The following reference types represent a potential reference to a kernel 141 * resource which, after first being allocated, must be checked and freed by 142 * the BPF program: 143 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET 144 * 145 * When the verifier sees a helper call return a reference type, it allocates a 146 * pointer id for the reference and stores it in the current function state. 147 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into 148 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type 149 * passes through a NULL-check conditional. For the branch wherein the state is 150 * changed to CONST_IMM, the verifier releases the reference. 151 * 152 * For each helper function that allocates a reference, such as 153 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as 154 * bpf_sk_release(). When a reference type passes into the release function, 155 * the verifier also releases the reference. If any unchecked or unreleased 156 * reference remains at the end of the program, the verifier rejects it. 157 */ 158 159 /* verifier_state + insn_idx are pushed to stack when branch is encountered */ 160 struct bpf_verifier_stack_elem { 161 /* verifer state is 'st' 162 * before processing instruction 'insn_idx' 163 * and after processing instruction 'prev_insn_idx' 164 */ 165 struct bpf_verifier_state st; 166 int insn_idx; 167 int prev_insn_idx; 168 struct bpf_verifier_stack_elem *next; 169 }; 170 171 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192 172 #define BPF_COMPLEXITY_LIMIT_STATES 64 173 174 #define BPF_MAP_KEY_POISON (1ULL << 63) 175 #define BPF_MAP_KEY_SEEN (1ULL << 62) 176 177 #define BPF_MAP_PTR_UNPRIV 1UL 178 #define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \ 179 POISON_POINTER_DELTA)) 180 #define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV)) 181 182 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) 183 { 184 return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON; 185 } 186 187 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux) 188 { 189 return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV; 190 } 191 192 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux, 193 const struct bpf_map *map, bool unpriv) 194 { 195 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV); 196 unpriv |= bpf_map_ptr_unpriv(aux); 197 aux->map_ptr_state = (unsigned long)map | 198 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL); 199 } 200 201 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux) 202 { 203 return aux->map_key_state & BPF_MAP_KEY_POISON; 204 } 205 206 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux) 207 { 208 return !(aux->map_key_state & BPF_MAP_KEY_SEEN); 209 } 210 211 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux) 212 { 213 return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON); 214 } 215 216 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state) 217 { 218 bool poisoned = bpf_map_key_poisoned(aux); 219 220 aux->map_key_state = state | BPF_MAP_KEY_SEEN | 221 (poisoned ? BPF_MAP_KEY_POISON : 0ULL); 222 } 223 224 struct bpf_call_arg_meta { 225 struct bpf_map *map_ptr; 226 bool raw_mode; 227 bool pkt_access; 228 int regno; 229 int access_size; 230 s64 msize_smax_value; 231 u64 msize_umax_value; 232 int ref_obj_id; 233 int func_id; 234 u32 btf_id; 235 }; 236 237 struct btf *btf_vmlinux; 238 239 static DEFINE_MUTEX(bpf_verifier_lock); 240 241 static const struct bpf_line_info * 242 find_linfo(const struct bpf_verifier_env *env, u32 insn_off) 243 { 244 const struct bpf_line_info *linfo; 245 const struct bpf_prog *prog; 246 u32 i, nr_linfo; 247 248 prog = env->prog; 249 nr_linfo = prog->aux->nr_linfo; 250 251 if (!nr_linfo || insn_off >= prog->len) 252 return NULL; 253 254 linfo = prog->aux->linfo; 255 for (i = 1; i < nr_linfo; i++) 256 if (insn_off < linfo[i].insn_off) 257 break; 258 259 return &linfo[i - 1]; 260 } 261 262 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, 263 va_list args) 264 { 265 unsigned int n; 266 267 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args); 268 269 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, 270 "verifier log line truncated - local buffer too short\n"); 271 272 n = min(log->len_total - log->len_used - 1, n); 273 log->kbuf[n] = '\0'; 274 275 if (log->level == BPF_LOG_KERNEL) { 276 pr_err("BPF:%s\n", log->kbuf); 277 return; 278 } 279 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) 280 log->len_used += n; 281 else 282 log->ubuf = NULL; 283 } 284 285 /* log_level controls verbosity level of eBPF verifier. 286 * bpf_verifier_log_write() is used to dump the verification trace to the log, 287 * so the user can figure out what's wrong with the program 288 */ 289 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, 290 const char *fmt, ...) 291 { 292 va_list args; 293 294 if (!bpf_verifier_log_needed(&env->log)) 295 return; 296 297 va_start(args, fmt); 298 bpf_verifier_vlog(&env->log, fmt, args); 299 va_end(args); 300 } 301 EXPORT_SYMBOL_GPL(bpf_verifier_log_write); 302 303 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...) 304 { 305 struct bpf_verifier_env *env = private_data; 306 va_list args; 307 308 if (!bpf_verifier_log_needed(&env->log)) 309 return; 310 311 va_start(args, fmt); 312 bpf_verifier_vlog(&env->log, fmt, args); 313 va_end(args); 314 } 315 316 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log, 317 const char *fmt, ...) 318 { 319 va_list args; 320 321 if (!bpf_verifier_log_needed(log)) 322 return; 323 324 va_start(args, fmt); 325 bpf_verifier_vlog(log, fmt, args); 326 va_end(args); 327 } 328 329 static const char *ltrim(const char *s) 330 { 331 while (isspace(*s)) 332 s++; 333 334 return s; 335 } 336 337 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env, 338 u32 insn_off, 339 const char *prefix_fmt, ...) 340 { 341 const struct bpf_line_info *linfo; 342 343 if (!bpf_verifier_log_needed(&env->log)) 344 return; 345 346 linfo = find_linfo(env, insn_off); 347 if (!linfo || linfo == env->prev_linfo) 348 return; 349 350 if (prefix_fmt) { 351 va_list args; 352 353 va_start(args, prefix_fmt); 354 bpf_verifier_vlog(&env->log, prefix_fmt, args); 355 va_end(args); 356 } 357 358 verbose(env, "%s\n", 359 ltrim(btf_name_by_offset(env->prog->aux->btf, 360 linfo->line_off))); 361 362 env->prev_linfo = linfo; 363 } 364 365 static bool type_is_pkt_pointer(enum bpf_reg_type type) 366 { 367 return type == PTR_TO_PACKET || 368 type == PTR_TO_PACKET_META; 369 } 370 371 static bool type_is_sk_pointer(enum bpf_reg_type type) 372 { 373 return type == PTR_TO_SOCKET || 374 type == PTR_TO_SOCK_COMMON || 375 type == PTR_TO_TCP_SOCK || 376 type == PTR_TO_XDP_SOCK; 377 } 378 379 static bool reg_type_may_be_null(enum bpf_reg_type type) 380 { 381 return type == PTR_TO_MAP_VALUE_OR_NULL || 382 type == PTR_TO_SOCKET_OR_NULL || 383 type == PTR_TO_SOCK_COMMON_OR_NULL || 384 type == PTR_TO_TCP_SOCK_OR_NULL; 385 } 386 387 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) 388 { 389 return reg->type == PTR_TO_MAP_VALUE && 390 map_value_has_spin_lock(reg->map_ptr); 391 } 392 393 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) 394 { 395 return type == PTR_TO_SOCKET || 396 type == PTR_TO_SOCKET_OR_NULL || 397 type == PTR_TO_TCP_SOCK || 398 type == PTR_TO_TCP_SOCK_OR_NULL; 399 } 400 401 static bool arg_type_may_be_refcounted(enum bpf_arg_type type) 402 { 403 return type == ARG_PTR_TO_SOCK_COMMON; 404 } 405 406 /* Determine whether the function releases some resources allocated by another 407 * function call. The first reference type argument will be assumed to be 408 * released by release_reference(). 409 */ 410 static bool is_release_function(enum bpf_func_id func_id) 411 { 412 return func_id == BPF_FUNC_sk_release; 413 } 414 415 static bool is_acquire_function(enum bpf_func_id func_id) 416 { 417 return func_id == BPF_FUNC_sk_lookup_tcp || 418 func_id == BPF_FUNC_sk_lookup_udp || 419 func_id == BPF_FUNC_skc_lookup_tcp; 420 } 421 422 static bool is_ptr_cast_function(enum bpf_func_id func_id) 423 { 424 return func_id == BPF_FUNC_tcp_sock || 425 func_id == BPF_FUNC_sk_fullsock; 426 } 427 428 /* string representation of 'enum bpf_reg_type' */ 429 static const char * const reg_type_str[] = { 430 [NOT_INIT] = "?", 431 [SCALAR_VALUE] = "inv", 432 [PTR_TO_CTX] = "ctx", 433 [CONST_PTR_TO_MAP] = "map_ptr", 434 [PTR_TO_MAP_VALUE] = "map_value", 435 [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", 436 [PTR_TO_STACK] = "fp", 437 [PTR_TO_PACKET] = "pkt", 438 [PTR_TO_PACKET_META] = "pkt_meta", 439 [PTR_TO_PACKET_END] = "pkt_end", 440 [PTR_TO_FLOW_KEYS] = "flow_keys", 441 [PTR_TO_SOCKET] = "sock", 442 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", 443 [PTR_TO_SOCK_COMMON] = "sock_common", 444 [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", 445 [PTR_TO_TCP_SOCK] = "tcp_sock", 446 [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", 447 [PTR_TO_TP_BUFFER] = "tp_buffer", 448 [PTR_TO_XDP_SOCK] = "xdp_sock", 449 [PTR_TO_BTF_ID] = "ptr_", 450 }; 451 452 static char slot_type_char[] = { 453 [STACK_INVALID] = '?', 454 [STACK_SPILL] = 'r', 455 [STACK_MISC] = 'm', 456 [STACK_ZERO] = '0', 457 }; 458 459 static void print_liveness(struct bpf_verifier_env *env, 460 enum bpf_reg_liveness live) 461 { 462 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE)) 463 verbose(env, "_"); 464 if (live & REG_LIVE_READ) 465 verbose(env, "r"); 466 if (live & REG_LIVE_WRITTEN) 467 verbose(env, "w"); 468 if (live & REG_LIVE_DONE) 469 verbose(env, "D"); 470 } 471 472 static struct bpf_func_state *func(struct bpf_verifier_env *env, 473 const struct bpf_reg_state *reg) 474 { 475 struct bpf_verifier_state *cur = env->cur_state; 476 477 return cur->frame[reg->frameno]; 478 } 479 480 const char *kernel_type_name(u32 id) 481 { 482 return btf_name_by_offset(btf_vmlinux, 483 btf_type_by_id(btf_vmlinux, id)->name_off); 484 } 485 486 static void print_verifier_state(struct bpf_verifier_env *env, 487 const struct bpf_func_state *state) 488 { 489 const struct bpf_reg_state *reg; 490 enum bpf_reg_type t; 491 int i; 492 493 if (state->frameno) 494 verbose(env, " frame%d:", state->frameno); 495 for (i = 0; i < MAX_BPF_REG; i++) { 496 reg = &state->regs[i]; 497 t = reg->type; 498 if (t == NOT_INIT) 499 continue; 500 verbose(env, " R%d", i); 501 print_liveness(env, reg->live); 502 verbose(env, "=%s", reg_type_str[t]); 503 if (t == SCALAR_VALUE && reg->precise) 504 verbose(env, "P"); 505 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && 506 tnum_is_const(reg->var_off)) { 507 /* reg->off should be 0 for SCALAR_VALUE */ 508 verbose(env, "%lld", reg->var_off.value + reg->off); 509 } else { 510 if (t == PTR_TO_BTF_ID) 511 verbose(env, "%s", kernel_type_name(reg->btf_id)); 512 verbose(env, "(id=%d", reg->id); 513 if (reg_type_may_be_refcounted_or_null(t)) 514 verbose(env, ",ref_obj_id=%d", reg->ref_obj_id); 515 if (t != SCALAR_VALUE) 516 verbose(env, ",off=%d", reg->off); 517 if (type_is_pkt_pointer(t)) 518 verbose(env, ",r=%d", reg->range); 519 else if (t == CONST_PTR_TO_MAP || 520 t == PTR_TO_MAP_VALUE || 521 t == PTR_TO_MAP_VALUE_OR_NULL) 522 verbose(env, ",ks=%d,vs=%d", 523 reg->map_ptr->key_size, 524 reg->map_ptr->value_size); 525 if (tnum_is_const(reg->var_off)) { 526 /* Typically an immediate SCALAR_VALUE, but 527 * could be a pointer whose offset is too big 528 * for reg->off 529 */ 530 verbose(env, ",imm=%llx", reg->var_off.value); 531 } else { 532 if (reg->smin_value != reg->umin_value && 533 reg->smin_value != S64_MIN) 534 verbose(env, ",smin_value=%lld", 535 (long long)reg->smin_value); 536 if (reg->smax_value != reg->umax_value && 537 reg->smax_value != S64_MAX) 538 verbose(env, ",smax_value=%lld", 539 (long long)reg->smax_value); 540 if (reg->umin_value != 0) 541 verbose(env, ",umin_value=%llu", 542 (unsigned long long)reg->umin_value); 543 if (reg->umax_value != U64_MAX) 544 verbose(env, ",umax_value=%llu", 545 (unsigned long long)reg->umax_value); 546 if (!tnum_is_unknown(reg->var_off)) { 547 char tn_buf[48]; 548 549 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 550 verbose(env, ",var_off=%s", tn_buf); 551 } 552 } 553 verbose(env, ")"); 554 } 555 } 556 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 557 char types_buf[BPF_REG_SIZE + 1]; 558 bool valid = false; 559 int j; 560 561 for (j = 0; j < BPF_REG_SIZE; j++) { 562 if (state->stack[i].slot_type[j] != STACK_INVALID) 563 valid = true; 564 types_buf[j] = slot_type_char[ 565 state->stack[i].slot_type[j]]; 566 } 567 types_buf[BPF_REG_SIZE] = 0; 568 if (!valid) 569 continue; 570 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); 571 print_liveness(env, state->stack[i].spilled_ptr.live); 572 if (state->stack[i].slot_type[0] == STACK_SPILL) { 573 reg = &state->stack[i].spilled_ptr; 574 t = reg->type; 575 verbose(env, "=%s", reg_type_str[t]); 576 if (t == SCALAR_VALUE && reg->precise) 577 verbose(env, "P"); 578 if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) 579 verbose(env, "%lld", reg->var_off.value + reg->off); 580 } else { 581 verbose(env, "=%s", types_buf); 582 } 583 } 584 if (state->acquired_refs && state->refs[0].id) { 585 verbose(env, " refs=%d", state->refs[0].id); 586 for (i = 1; i < state->acquired_refs; i++) 587 if (state->refs[i].id) 588 verbose(env, ",%d", state->refs[i].id); 589 } 590 verbose(env, "\n"); 591 } 592 593 #define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \ 594 static int copy_##NAME##_state(struct bpf_func_state *dst, \ 595 const struct bpf_func_state *src) \ 596 { \ 597 if (!src->FIELD) \ 598 return 0; \ 599 if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \ 600 /* internal bug, make state invalid to reject the program */ \ 601 memset(dst, 0, sizeof(*dst)); \ 602 return -EFAULT; \ 603 } \ 604 memcpy(dst->FIELD, src->FIELD, \ 605 sizeof(*src->FIELD) * (src->COUNT / SIZE)); \ 606 return 0; \ 607 } 608 /* copy_reference_state() */ 609 COPY_STATE_FN(reference, acquired_refs, refs, 1) 610 /* copy_stack_state() */ 611 COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) 612 #undef COPY_STATE_FN 613 614 #define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \ 615 static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \ 616 bool copy_old) \ 617 { \ 618 u32 old_size = state->COUNT; \ 619 struct bpf_##NAME##_state *new_##FIELD; \ 620 int slot = size / SIZE; \ 621 \ 622 if (size <= old_size || !size) { \ 623 if (copy_old) \ 624 return 0; \ 625 state->COUNT = slot * SIZE; \ 626 if (!size && old_size) { \ 627 kfree(state->FIELD); \ 628 state->FIELD = NULL; \ 629 } \ 630 return 0; \ 631 } \ 632 new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \ 633 GFP_KERNEL); \ 634 if (!new_##FIELD) \ 635 return -ENOMEM; \ 636 if (copy_old) { \ 637 if (state->FIELD) \ 638 memcpy(new_##FIELD, state->FIELD, \ 639 sizeof(*new_##FIELD) * (old_size / SIZE)); \ 640 memset(new_##FIELD + old_size / SIZE, 0, \ 641 sizeof(*new_##FIELD) * (size - old_size) / SIZE); \ 642 } \ 643 state->COUNT = slot * SIZE; \ 644 kfree(state->FIELD); \ 645 state->FIELD = new_##FIELD; \ 646 return 0; \ 647 } 648 /* realloc_reference_state() */ 649 REALLOC_STATE_FN(reference, acquired_refs, refs, 1) 650 /* realloc_stack_state() */ 651 REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) 652 #undef REALLOC_STATE_FN 653 654 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to 655 * make it consume minimal amount of memory. check_stack_write() access from 656 * the program calls into realloc_func_state() to grow the stack size. 657 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state 658 * which realloc_stack_state() copies over. It points to previous 659 * bpf_verifier_state which is never reallocated. 660 */ 661 static int realloc_func_state(struct bpf_func_state *state, int stack_size, 662 int refs_size, bool copy_old) 663 { 664 int err = realloc_reference_state(state, refs_size, copy_old); 665 if (err) 666 return err; 667 return realloc_stack_state(state, stack_size, copy_old); 668 } 669 670 /* Acquire a pointer id from the env and update the state->refs to include 671 * this new pointer reference. 672 * On success, returns a valid pointer id to associate with the register 673 * On failure, returns a negative errno. 674 */ 675 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) 676 { 677 struct bpf_func_state *state = cur_func(env); 678 int new_ofs = state->acquired_refs; 679 int id, err; 680 681 err = realloc_reference_state(state, state->acquired_refs + 1, true); 682 if (err) 683 return err; 684 id = ++env->id_gen; 685 state->refs[new_ofs].id = id; 686 state->refs[new_ofs].insn_idx = insn_idx; 687 688 return id; 689 } 690 691 /* release function corresponding to acquire_reference_state(). Idempotent. */ 692 static int release_reference_state(struct bpf_func_state *state, int ptr_id) 693 { 694 int i, last_idx; 695 696 last_idx = state->acquired_refs - 1; 697 for (i = 0; i < state->acquired_refs; i++) { 698 if (state->refs[i].id == ptr_id) { 699 if (last_idx && i != last_idx) 700 memcpy(&state->refs[i], &state->refs[last_idx], 701 sizeof(*state->refs)); 702 memset(&state->refs[last_idx], 0, sizeof(*state->refs)); 703 state->acquired_refs--; 704 return 0; 705 } 706 } 707 return -EINVAL; 708 } 709 710 static int transfer_reference_state(struct bpf_func_state *dst, 711 struct bpf_func_state *src) 712 { 713 int err = realloc_reference_state(dst, src->acquired_refs, false); 714 if (err) 715 return err; 716 err = copy_reference_state(dst, src); 717 if (err) 718 return err; 719 return 0; 720 } 721 722 static void free_func_state(struct bpf_func_state *state) 723 { 724 if (!state) 725 return; 726 kfree(state->refs); 727 kfree(state->stack); 728 kfree(state); 729 } 730 731 static void clear_jmp_history(struct bpf_verifier_state *state) 732 { 733 kfree(state->jmp_history); 734 state->jmp_history = NULL; 735 state->jmp_history_cnt = 0; 736 } 737 738 static void free_verifier_state(struct bpf_verifier_state *state, 739 bool free_self) 740 { 741 int i; 742 743 for (i = 0; i <= state->curframe; i++) { 744 free_func_state(state->frame[i]); 745 state->frame[i] = NULL; 746 } 747 clear_jmp_history(state); 748 if (free_self) 749 kfree(state); 750 } 751 752 /* copy verifier state from src to dst growing dst stack space 753 * when necessary to accommodate larger src stack 754 */ 755 static int copy_func_state(struct bpf_func_state *dst, 756 const struct bpf_func_state *src) 757 { 758 int err; 759 760 err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs, 761 false); 762 if (err) 763 return err; 764 memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs)); 765 err = copy_reference_state(dst, src); 766 if (err) 767 return err; 768 return copy_stack_state(dst, src); 769 } 770 771 static int copy_verifier_state(struct bpf_verifier_state *dst_state, 772 const struct bpf_verifier_state *src) 773 { 774 struct bpf_func_state *dst; 775 u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt; 776 int i, err; 777 778 if (dst_state->jmp_history_cnt < src->jmp_history_cnt) { 779 kfree(dst_state->jmp_history); 780 dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER); 781 if (!dst_state->jmp_history) 782 return -ENOMEM; 783 } 784 memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz); 785 dst_state->jmp_history_cnt = src->jmp_history_cnt; 786 787 /* if dst has more stack frames then src frame, free them */ 788 for (i = src->curframe + 1; i <= dst_state->curframe; i++) { 789 free_func_state(dst_state->frame[i]); 790 dst_state->frame[i] = NULL; 791 } 792 dst_state->speculative = src->speculative; 793 dst_state->curframe = src->curframe; 794 dst_state->active_spin_lock = src->active_spin_lock; 795 dst_state->branches = src->branches; 796 dst_state->parent = src->parent; 797 dst_state->first_insn_idx = src->first_insn_idx; 798 dst_state->last_insn_idx = src->last_insn_idx; 799 for (i = 0; i <= src->curframe; i++) { 800 dst = dst_state->frame[i]; 801 if (!dst) { 802 dst = kzalloc(sizeof(*dst), GFP_KERNEL); 803 if (!dst) 804 return -ENOMEM; 805 dst_state->frame[i] = dst; 806 } 807 err = copy_func_state(dst, src->frame[i]); 808 if (err) 809 return err; 810 } 811 return 0; 812 } 813 814 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 815 { 816 while (st) { 817 u32 br = --st->branches; 818 819 /* WARN_ON(br > 1) technically makes sense here, 820 * but see comment in push_stack(), hence: 821 */ 822 WARN_ONCE((int)br < 0, 823 "BUG update_branch_counts:branches_to_explore=%d\n", 824 br); 825 if (br) 826 break; 827 st = st->parent; 828 } 829 } 830 831 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, 832 int *insn_idx) 833 { 834 struct bpf_verifier_state *cur = env->cur_state; 835 struct bpf_verifier_stack_elem *elem, *head = env->head; 836 int err; 837 838 if (env->head == NULL) 839 return -ENOENT; 840 841 if (cur) { 842 err = copy_verifier_state(cur, &head->st); 843 if (err) 844 return err; 845 } 846 if (insn_idx) 847 *insn_idx = head->insn_idx; 848 if (prev_insn_idx) 849 *prev_insn_idx = head->prev_insn_idx; 850 elem = head->next; 851 free_verifier_state(&head->st, false); 852 kfree(head); 853 env->head = elem; 854 env->stack_size--; 855 return 0; 856 } 857 858 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, 859 int insn_idx, int prev_insn_idx, 860 bool speculative) 861 { 862 struct bpf_verifier_state *cur = env->cur_state; 863 struct bpf_verifier_stack_elem *elem; 864 int err; 865 866 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); 867 if (!elem) 868 goto err; 869 870 elem->insn_idx = insn_idx; 871 elem->prev_insn_idx = prev_insn_idx; 872 elem->next = env->head; 873 env->head = elem; 874 env->stack_size++; 875 err = copy_verifier_state(&elem->st, cur); 876 if (err) 877 goto err; 878 elem->st.speculative |= speculative; 879 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { 880 verbose(env, "The sequence of %d jumps is too complex.\n", 881 env->stack_size); 882 goto err; 883 } 884 if (elem->st.parent) { 885 ++elem->st.parent->branches; 886 /* WARN_ON(branches > 2) technically makes sense here, 887 * but 888 * 1. speculative states will bump 'branches' for non-branch 889 * instructions 890 * 2. is_state_visited() heuristics may decide not to create 891 * a new state for a sequence of branches and all such current 892 * and cloned states will be pointing to a single parent state 893 * which might have large 'branches' count. 894 */ 895 } 896 return &elem->st; 897 err: 898 free_verifier_state(env->cur_state, true); 899 env->cur_state = NULL; 900 /* pop all elements and return */ 901 while (!pop_stack(env, NULL, NULL)); 902 return NULL; 903 } 904 905 #define CALLER_SAVED_REGS 6 906 static const int caller_saved[CALLER_SAVED_REGS] = { 907 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 908 }; 909 910 static void __mark_reg_not_init(const struct bpf_verifier_env *env, 911 struct bpf_reg_state *reg); 912 913 /* Mark the unknown part of a register (variable offset or scalar value) as 914 * known to have the value @imm. 915 */ 916 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) 917 { 918 /* Clear id, off, and union(map_ptr, range) */ 919 memset(((u8 *)reg) + sizeof(reg->type), 0, 920 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); 921 reg->var_off = tnum_const(imm); 922 reg->smin_value = (s64)imm; 923 reg->smax_value = (s64)imm; 924 reg->umin_value = imm; 925 reg->umax_value = imm; 926 } 927 928 /* Mark the 'variable offset' part of a register as zero. This should be 929 * used only on registers holding a pointer type. 930 */ 931 static void __mark_reg_known_zero(struct bpf_reg_state *reg) 932 { 933 __mark_reg_known(reg, 0); 934 } 935 936 static void __mark_reg_const_zero(struct bpf_reg_state *reg) 937 { 938 __mark_reg_known(reg, 0); 939 reg->type = SCALAR_VALUE; 940 } 941 942 static void mark_reg_known_zero(struct bpf_verifier_env *env, 943 struct bpf_reg_state *regs, u32 regno) 944 { 945 if (WARN_ON(regno >= MAX_BPF_REG)) { 946 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno); 947 /* Something bad happened, let's kill all regs */ 948 for (regno = 0; regno < MAX_BPF_REG; regno++) 949 __mark_reg_not_init(env, regs + regno); 950 return; 951 } 952 __mark_reg_known_zero(regs + regno); 953 } 954 955 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) 956 { 957 return type_is_pkt_pointer(reg->type); 958 } 959 960 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg) 961 { 962 return reg_is_pkt_pointer(reg) || 963 reg->type == PTR_TO_PACKET_END; 964 } 965 966 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */ 967 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, 968 enum bpf_reg_type which) 969 { 970 /* The register can already have a range from prior markings. 971 * This is fine as long as it hasn't been advanced from its 972 * origin. 973 */ 974 return reg->type == which && 975 reg->id == 0 && 976 reg->off == 0 && 977 tnum_equals_const(reg->var_off, 0); 978 } 979 980 /* Attempts to improve min/max values based on var_off information */ 981 static void __update_reg_bounds(struct bpf_reg_state *reg) 982 { 983 /* min signed is max(sign bit) | min(other bits) */ 984 reg->smin_value = max_t(s64, reg->smin_value, 985 reg->var_off.value | (reg->var_off.mask & S64_MIN)); 986 /* max signed is min(sign bit) | max(other bits) */ 987 reg->smax_value = min_t(s64, reg->smax_value, 988 reg->var_off.value | (reg->var_off.mask & S64_MAX)); 989 reg->umin_value = max(reg->umin_value, reg->var_off.value); 990 reg->umax_value = min(reg->umax_value, 991 reg->var_off.value | reg->var_off.mask); 992 } 993 994 /* Uses signed min/max values to inform unsigned, and vice-versa */ 995 static void __reg_deduce_bounds(struct bpf_reg_state *reg) 996 { 997 /* Learn sign from signed bounds. 998 * If we cannot cross the sign boundary, then signed and unsigned bounds 999 * are the same, so combine. This works even in the negative case, e.g. 1000 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff. 1001 */ 1002 if (reg->smin_value >= 0 || reg->smax_value < 0) { 1003 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, 1004 reg->umin_value); 1005 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, 1006 reg->umax_value); 1007 return; 1008 } 1009 /* Learn sign from unsigned bounds. Signed bounds cross the sign 1010 * boundary, so we must be careful. 1011 */ 1012 if ((s64)reg->umax_value >= 0) { 1013 /* Positive. We can't learn anything from the smin, but smax 1014 * is positive, hence safe. 1015 */ 1016 reg->smin_value = reg->umin_value; 1017 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, 1018 reg->umax_value); 1019 } else if ((s64)reg->umin_value < 0) { 1020 /* Negative. We can't learn anything from the smax, but smin 1021 * is negative, hence safe. 1022 */ 1023 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, 1024 reg->umin_value); 1025 reg->smax_value = reg->umax_value; 1026 } 1027 } 1028 1029 /* Attempts to improve var_off based on unsigned min/max information */ 1030 static void __reg_bound_offset(struct bpf_reg_state *reg) 1031 { 1032 reg->var_off = tnum_intersect(reg->var_off, 1033 tnum_range(reg->umin_value, 1034 reg->umax_value)); 1035 } 1036 1037 static void __reg_bound_offset32(struct bpf_reg_state *reg) 1038 { 1039 u64 mask = 0xffffFFFF; 1040 struct tnum range = tnum_range(reg->umin_value & mask, 1041 reg->umax_value & mask); 1042 struct tnum lo32 = tnum_cast(reg->var_off, 4); 1043 struct tnum hi32 = tnum_lshift(tnum_rshift(reg->var_off, 32), 32); 1044 1045 reg->var_off = tnum_or(hi32, tnum_intersect(lo32, range)); 1046 } 1047 1048 /* Reset the min/max bounds of a register */ 1049 static void __mark_reg_unbounded(struct bpf_reg_state *reg) 1050 { 1051 reg->smin_value = S64_MIN; 1052 reg->smax_value = S64_MAX; 1053 reg->umin_value = 0; 1054 reg->umax_value = U64_MAX; 1055 } 1056 1057 /* Mark a register as having a completely unknown (scalar) value. */ 1058 static void __mark_reg_unknown(const struct bpf_verifier_env *env, 1059 struct bpf_reg_state *reg) 1060 { 1061 /* 1062 * Clear type, id, off, and union(map_ptr, range) and 1063 * padding between 'type' and union 1064 */ 1065 memset(reg, 0, offsetof(struct bpf_reg_state, var_off)); 1066 reg->type = SCALAR_VALUE; 1067 reg->var_off = tnum_unknown; 1068 reg->frameno = 0; 1069 reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ? 1070 true : false; 1071 __mark_reg_unbounded(reg); 1072 } 1073 1074 static void mark_reg_unknown(struct bpf_verifier_env *env, 1075 struct bpf_reg_state *regs, u32 regno) 1076 { 1077 if (WARN_ON(regno >= MAX_BPF_REG)) { 1078 verbose(env, "mark_reg_unknown(regs, %u)\n", regno); 1079 /* Something bad happened, let's kill all regs except FP */ 1080 for (regno = 0; regno < BPF_REG_FP; regno++) 1081 __mark_reg_not_init(env, regs + regno); 1082 return; 1083 } 1084 __mark_reg_unknown(env, regs + regno); 1085 } 1086 1087 static void __mark_reg_not_init(const struct bpf_verifier_env *env, 1088 struct bpf_reg_state *reg) 1089 { 1090 __mark_reg_unknown(env, reg); 1091 reg->type = NOT_INIT; 1092 } 1093 1094 static void mark_reg_not_init(struct bpf_verifier_env *env, 1095 struct bpf_reg_state *regs, u32 regno) 1096 { 1097 if (WARN_ON(regno >= MAX_BPF_REG)) { 1098 verbose(env, "mark_reg_not_init(regs, %u)\n", regno); 1099 /* Something bad happened, let's kill all regs except FP */ 1100 for (regno = 0; regno < BPF_REG_FP; regno++) 1101 __mark_reg_not_init(env, regs + regno); 1102 return; 1103 } 1104 __mark_reg_not_init(env, regs + regno); 1105 } 1106 1107 #define DEF_NOT_SUBREG (0) 1108 static void init_reg_state(struct bpf_verifier_env *env, 1109 struct bpf_func_state *state) 1110 { 1111 struct bpf_reg_state *regs = state->regs; 1112 int i; 1113 1114 for (i = 0; i < MAX_BPF_REG; i++) { 1115 mark_reg_not_init(env, regs, i); 1116 regs[i].live = REG_LIVE_NONE; 1117 regs[i].parent = NULL; 1118 regs[i].subreg_def = DEF_NOT_SUBREG; 1119 } 1120 1121 /* frame pointer */ 1122 regs[BPF_REG_FP].type = PTR_TO_STACK; 1123 mark_reg_known_zero(env, regs, BPF_REG_FP); 1124 regs[BPF_REG_FP].frameno = state->frameno; 1125 } 1126 1127 #define BPF_MAIN_FUNC (-1) 1128 static void init_func_state(struct bpf_verifier_env *env, 1129 struct bpf_func_state *state, 1130 int callsite, int frameno, int subprogno) 1131 { 1132 state->callsite = callsite; 1133 state->frameno = frameno; 1134 state->subprogno = subprogno; 1135 init_reg_state(env, state); 1136 } 1137 1138 enum reg_arg_type { 1139 SRC_OP, /* register is used as source operand */ 1140 DST_OP, /* register is used as destination operand */ 1141 DST_OP_NO_MARK /* same as above, check only, don't mark */ 1142 }; 1143 1144 static int cmp_subprogs(const void *a, const void *b) 1145 { 1146 return ((struct bpf_subprog_info *)a)->start - 1147 ((struct bpf_subprog_info *)b)->start; 1148 } 1149 1150 static int find_subprog(struct bpf_verifier_env *env, int off) 1151 { 1152 struct bpf_subprog_info *p; 1153 1154 p = bsearch(&off, env->subprog_info, env->subprog_cnt, 1155 sizeof(env->subprog_info[0]), cmp_subprogs); 1156 if (!p) 1157 return -ENOENT; 1158 return p - env->subprog_info; 1159 1160 } 1161 1162 static int add_subprog(struct bpf_verifier_env *env, int off) 1163 { 1164 int insn_cnt = env->prog->len; 1165 int ret; 1166 1167 if (off >= insn_cnt || off < 0) { 1168 verbose(env, "call to invalid destination\n"); 1169 return -EINVAL; 1170 } 1171 ret = find_subprog(env, off); 1172 if (ret >= 0) 1173 return 0; 1174 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) { 1175 verbose(env, "too many subprograms\n"); 1176 return -E2BIG; 1177 } 1178 env->subprog_info[env->subprog_cnt++].start = off; 1179 sort(env->subprog_info, env->subprog_cnt, 1180 sizeof(env->subprog_info[0]), cmp_subprogs, NULL); 1181 return 0; 1182 } 1183 1184 static int check_subprogs(struct bpf_verifier_env *env) 1185 { 1186 int i, ret, subprog_start, subprog_end, off, cur_subprog = 0; 1187 struct bpf_subprog_info *subprog = env->subprog_info; 1188 struct bpf_insn *insn = env->prog->insnsi; 1189 int insn_cnt = env->prog->len; 1190 1191 /* Add entry function. */ 1192 ret = add_subprog(env, 0); 1193 if (ret < 0) 1194 return ret; 1195 1196 /* determine subprog starts. The end is one before the next starts */ 1197 for (i = 0; i < insn_cnt; i++) { 1198 if (insn[i].code != (BPF_JMP | BPF_CALL)) 1199 continue; 1200 if (insn[i].src_reg != BPF_PSEUDO_CALL) 1201 continue; 1202 if (!env->allow_ptr_leaks) { 1203 verbose(env, "function calls to other bpf functions are allowed for root only\n"); 1204 return -EPERM; 1205 } 1206 ret = add_subprog(env, i + insn[i].imm + 1); 1207 if (ret < 0) 1208 return ret; 1209 } 1210 1211 /* Add a fake 'exit' subprog which could simplify subprog iteration 1212 * logic. 'subprog_cnt' should not be increased. 1213 */ 1214 subprog[env->subprog_cnt].start = insn_cnt; 1215 1216 if (env->log.level & BPF_LOG_LEVEL2) 1217 for (i = 0; i < env->subprog_cnt; i++) 1218 verbose(env, "func#%d @%d\n", i, subprog[i].start); 1219 1220 /* now check that all jumps are within the same subprog */ 1221 subprog_start = subprog[cur_subprog].start; 1222 subprog_end = subprog[cur_subprog + 1].start; 1223 for (i = 0; i < insn_cnt; i++) { 1224 u8 code = insn[i].code; 1225 1226 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) 1227 goto next; 1228 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) 1229 goto next; 1230 off = i + insn[i].off + 1; 1231 if (off < subprog_start || off >= subprog_end) { 1232 verbose(env, "jump out of range from insn %d to %d\n", i, off); 1233 return -EINVAL; 1234 } 1235 next: 1236 if (i == subprog_end - 1) { 1237 /* to avoid fall-through from one subprog into another 1238 * the last insn of the subprog should be either exit 1239 * or unconditional jump back 1240 */ 1241 if (code != (BPF_JMP | BPF_EXIT) && 1242 code != (BPF_JMP | BPF_JA)) { 1243 verbose(env, "last insn is not an exit or jmp\n"); 1244 return -EINVAL; 1245 } 1246 subprog_start = subprog_end; 1247 cur_subprog++; 1248 if (cur_subprog < env->subprog_cnt) 1249 subprog_end = subprog[cur_subprog + 1].start; 1250 } 1251 } 1252 return 0; 1253 } 1254 1255 /* Parentage chain of this register (or stack slot) should take care of all 1256 * issues like callee-saved registers, stack slot allocation time, etc. 1257 */ 1258 static int mark_reg_read(struct bpf_verifier_env *env, 1259 const struct bpf_reg_state *state, 1260 struct bpf_reg_state *parent, u8 flag) 1261 { 1262 bool writes = parent == state->parent; /* Observe write marks */ 1263 int cnt = 0; 1264 1265 while (parent) { 1266 /* if read wasn't screened by an earlier write ... */ 1267 if (writes && state->live & REG_LIVE_WRITTEN) 1268 break; 1269 if (parent->live & REG_LIVE_DONE) { 1270 verbose(env, "verifier BUG type %s var_off %lld off %d\n", 1271 reg_type_str[parent->type], 1272 parent->var_off.value, parent->off); 1273 return -EFAULT; 1274 } 1275 /* The first condition is more likely to be true than the 1276 * second, checked it first. 1277 */ 1278 if ((parent->live & REG_LIVE_READ) == flag || 1279 parent->live & REG_LIVE_READ64) 1280 /* The parentage chain never changes and 1281 * this parent was already marked as LIVE_READ. 1282 * There is no need to keep walking the chain again and 1283 * keep re-marking all parents as LIVE_READ. 1284 * This case happens when the same register is read 1285 * multiple times without writes into it in-between. 1286 * Also, if parent has the stronger REG_LIVE_READ64 set, 1287 * then no need to set the weak REG_LIVE_READ32. 1288 */ 1289 break; 1290 /* ... then we depend on parent's value */ 1291 parent->live |= flag; 1292 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */ 1293 if (flag == REG_LIVE_READ64) 1294 parent->live &= ~REG_LIVE_READ32; 1295 state = parent; 1296 parent = state->parent; 1297 writes = true; 1298 cnt++; 1299 } 1300 1301 if (env->longest_mark_read_walk < cnt) 1302 env->longest_mark_read_walk = cnt; 1303 return 0; 1304 } 1305 1306 /* This function is supposed to be used by the following 32-bit optimization 1307 * code only. It returns TRUE if the source or destination register operates 1308 * on 64-bit, otherwise return FALSE. 1309 */ 1310 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, 1311 u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t) 1312 { 1313 u8 code, class, op; 1314 1315 code = insn->code; 1316 class = BPF_CLASS(code); 1317 op = BPF_OP(code); 1318 if (class == BPF_JMP) { 1319 /* BPF_EXIT for "main" will reach here. Return TRUE 1320 * conservatively. 1321 */ 1322 if (op == BPF_EXIT) 1323 return true; 1324 if (op == BPF_CALL) { 1325 /* BPF to BPF call will reach here because of marking 1326 * caller saved clobber with DST_OP_NO_MARK for which we 1327 * don't care the register def because they are anyway 1328 * marked as NOT_INIT already. 1329 */ 1330 if (insn->src_reg == BPF_PSEUDO_CALL) 1331 return false; 1332 /* Helper call will reach here because of arg type 1333 * check, conservatively return TRUE. 1334 */ 1335 if (t == SRC_OP) 1336 return true; 1337 1338 return false; 1339 } 1340 } 1341 1342 if (class == BPF_ALU64 || class == BPF_JMP || 1343 /* BPF_END always use BPF_ALU class. */ 1344 (class == BPF_ALU && op == BPF_END && insn->imm == 64)) 1345 return true; 1346 1347 if (class == BPF_ALU || class == BPF_JMP32) 1348 return false; 1349 1350 if (class == BPF_LDX) { 1351 if (t != SRC_OP) 1352 return BPF_SIZE(code) == BPF_DW; 1353 /* LDX source must be ptr. */ 1354 return true; 1355 } 1356 1357 if (class == BPF_STX) { 1358 if (reg->type != SCALAR_VALUE) 1359 return true; 1360 return BPF_SIZE(code) == BPF_DW; 1361 } 1362 1363 if (class == BPF_LD) { 1364 u8 mode = BPF_MODE(code); 1365 1366 /* LD_IMM64 */ 1367 if (mode == BPF_IMM) 1368 return true; 1369 1370 /* Both LD_IND and LD_ABS return 32-bit data. */ 1371 if (t != SRC_OP) 1372 return false; 1373 1374 /* Implicit ctx ptr. */ 1375 if (regno == BPF_REG_6) 1376 return true; 1377 1378 /* Explicit source could be any width. */ 1379 return true; 1380 } 1381 1382 if (class == BPF_ST) 1383 /* The only source register for BPF_ST is a ptr. */ 1384 return true; 1385 1386 /* Conservatively return true at default. */ 1387 return true; 1388 } 1389 1390 /* Return TRUE if INSN doesn't have explicit value define. */ 1391 static bool insn_no_def(struct bpf_insn *insn) 1392 { 1393 u8 class = BPF_CLASS(insn->code); 1394 1395 return (class == BPF_JMP || class == BPF_JMP32 || 1396 class == BPF_STX || class == BPF_ST); 1397 } 1398 1399 /* Return TRUE if INSN has defined any 32-bit value explicitly. */ 1400 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn) 1401 { 1402 if (insn_no_def(insn)) 1403 return false; 1404 1405 return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP); 1406 } 1407 1408 static void mark_insn_zext(struct bpf_verifier_env *env, 1409 struct bpf_reg_state *reg) 1410 { 1411 s32 def_idx = reg->subreg_def; 1412 1413 if (def_idx == DEF_NOT_SUBREG) 1414 return; 1415 1416 env->insn_aux_data[def_idx - 1].zext_dst = true; 1417 /* The dst will be zero extended, so won't be sub-register anymore. */ 1418 reg->subreg_def = DEF_NOT_SUBREG; 1419 } 1420 1421 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, 1422 enum reg_arg_type t) 1423 { 1424 struct bpf_verifier_state *vstate = env->cur_state; 1425 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 1426 struct bpf_insn *insn = env->prog->insnsi + env->insn_idx; 1427 struct bpf_reg_state *reg, *regs = state->regs; 1428 bool rw64; 1429 1430 if (regno >= MAX_BPF_REG) { 1431 verbose(env, "R%d is invalid\n", regno); 1432 return -EINVAL; 1433 } 1434 1435 reg = ®s[regno]; 1436 rw64 = is_reg64(env, insn, regno, reg, t); 1437 if (t == SRC_OP) { 1438 /* check whether register used as source operand can be read */ 1439 if (reg->type == NOT_INIT) { 1440 verbose(env, "R%d !read_ok\n", regno); 1441 return -EACCES; 1442 } 1443 /* We don't need to worry about FP liveness because it's read-only */ 1444 if (regno == BPF_REG_FP) 1445 return 0; 1446 1447 if (rw64) 1448 mark_insn_zext(env, reg); 1449 1450 return mark_reg_read(env, reg, reg->parent, 1451 rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32); 1452 } else { 1453 /* check whether register used as dest operand can be written to */ 1454 if (regno == BPF_REG_FP) { 1455 verbose(env, "frame pointer is read only\n"); 1456 return -EACCES; 1457 } 1458 reg->live |= REG_LIVE_WRITTEN; 1459 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1; 1460 if (t == DST_OP) 1461 mark_reg_unknown(env, regs, regno); 1462 } 1463 return 0; 1464 } 1465 1466 /* for any branch, call, exit record the history of jmps in the given state */ 1467 static int push_jmp_history(struct bpf_verifier_env *env, 1468 struct bpf_verifier_state *cur) 1469 { 1470 u32 cnt = cur->jmp_history_cnt; 1471 struct bpf_idx_pair *p; 1472 1473 cnt++; 1474 p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER); 1475 if (!p) 1476 return -ENOMEM; 1477 p[cnt - 1].idx = env->insn_idx; 1478 p[cnt - 1].prev_idx = env->prev_insn_idx; 1479 cur->jmp_history = p; 1480 cur->jmp_history_cnt = cnt; 1481 return 0; 1482 } 1483 1484 /* Backtrack one insn at a time. If idx is not at the top of recorded 1485 * history then previous instruction came from straight line execution. 1486 */ 1487 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, 1488 u32 *history) 1489 { 1490 u32 cnt = *history; 1491 1492 if (cnt && st->jmp_history[cnt - 1].idx == i) { 1493 i = st->jmp_history[cnt - 1].prev_idx; 1494 (*history)--; 1495 } else { 1496 i--; 1497 } 1498 return i; 1499 } 1500 1501 /* For given verifier state backtrack_insn() is called from the last insn to 1502 * the first insn. Its purpose is to compute a bitmask of registers and 1503 * stack slots that needs precision in the parent verifier state. 1504 */ 1505 static int backtrack_insn(struct bpf_verifier_env *env, int idx, 1506 u32 *reg_mask, u64 *stack_mask) 1507 { 1508 const struct bpf_insn_cbs cbs = { 1509 .cb_print = verbose, 1510 .private_data = env, 1511 }; 1512 struct bpf_insn *insn = env->prog->insnsi + idx; 1513 u8 class = BPF_CLASS(insn->code); 1514 u8 opcode = BPF_OP(insn->code); 1515 u8 mode = BPF_MODE(insn->code); 1516 u32 dreg = 1u << insn->dst_reg; 1517 u32 sreg = 1u << insn->src_reg; 1518 u32 spi; 1519 1520 if (insn->code == 0) 1521 return 0; 1522 if (env->log.level & BPF_LOG_LEVEL) { 1523 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask); 1524 verbose(env, "%d: ", idx); 1525 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); 1526 } 1527 1528 if (class == BPF_ALU || class == BPF_ALU64) { 1529 if (!(*reg_mask & dreg)) 1530 return 0; 1531 if (opcode == BPF_MOV) { 1532 if (BPF_SRC(insn->code) == BPF_X) { 1533 /* dreg = sreg 1534 * dreg needs precision after this insn 1535 * sreg needs precision before this insn 1536 */ 1537 *reg_mask &= ~dreg; 1538 *reg_mask |= sreg; 1539 } else { 1540 /* dreg = K 1541 * dreg needs precision after this insn. 1542 * Corresponding register is already marked 1543 * as precise=true in this verifier state. 1544 * No further markings in parent are necessary 1545 */ 1546 *reg_mask &= ~dreg; 1547 } 1548 } else { 1549 if (BPF_SRC(insn->code) == BPF_X) { 1550 /* dreg += sreg 1551 * both dreg and sreg need precision 1552 * before this insn 1553 */ 1554 *reg_mask |= sreg; 1555 } /* else dreg += K 1556 * dreg still needs precision before this insn 1557 */ 1558 } 1559 } else if (class == BPF_LDX) { 1560 if (!(*reg_mask & dreg)) 1561 return 0; 1562 *reg_mask &= ~dreg; 1563 1564 /* scalars can only be spilled into stack w/o losing precision. 1565 * Load from any other memory can be zero extended. 1566 * The desire to keep that precision is already indicated 1567 * by 'precise' mark in corresponding register of this state. 1568 * No further tracking necessary. 1569 */ 1570 if (insn->src_reg != BPF_REG_FP) 1571 return 0; 1572 if (BPF_SIZE(insn->code) != BPF_DW) 1573 return 0; 1574 1575 /* dreg = *(u64 *)[fp - off] was a fill from the stack. 1576 * that [fp - off] slot contains scalar that needs to be 1577 * tracked with precision 1578 */ 1579 spi = (-insn->off - 1) / BPF_REG_SIZE; 1580 if (spi >= 64) { 1581 verbose(env, "BUG spi %d\n", spi); 1582 WARN_ONCE(1, "verifier backtracking bug"); 1583 return -EFAULT; 1584 } 1585 *stack_mask |= 1ull << spi; 1586 } else if (class == BPF_STX || class == BPF_ST) { 1587 if (*reg_mask & dreg) 1588 /* stx & st shouldn't be using _scalar_ dst_reg 1589 * to access memory. It means backtracking 1590 * encountered a case of pointer subtraction. 1591 */ 1592 return -ENOTSUPP; 1593 /* scalars can only be spilled into stack */ 1594 if (insn->dst_reg != BPF_REG_FP) 1595 return 0; 1596 if (BPF_SIZE(insn->code) != BPF_DW) 1597 return 0; 1598 spi = (-insn->off - 1) / BPF_REG_SIZE; 1599 if (spi >= 64) { 1600 verbose(env, "BUG spi %d\n", spi); 1601 WARN_ONCE(1, "verifier backtracking bug"); 1602 return -EFAULT; 1603 } 1604 if (!(*stack_mask & (1ull << spi))) 1605 return 0; 1606 *stack_mask &= ~(1ull << spi); 1607 if (class == BPF_STX) 1608 *reg_mask |= sreg; 1609 } else if (class == BPF_JMP || class == BPF_JMP32) { 1610 if (opcode == BPF_CALL) { 1611 if (insn->src_reg == BPF_PSEUDO_CALL) 1612 return -ENOTSUPP; 1613 /* regular helper call sets R0 */ 1614 *reg_mask &= ~1; 1615 if (*reg_mask & 0x3f) { 1616 /* if backtracing was looking for registers R1-R5 1617 * they should have been found already. 1618 */ 1619 verbose(env, "BUG regs %x\n", *reg_mask); 1620 WARN_ONCE(1, "verifier backtracking bug"); 1621 return -EFAULT; 1622 } 1623 } else if (opcode == BPF_EXIT) { 1624 return -ENOTSUPP; 1625 } 1626 } else if (class == BPF_LD) { 1627 if (!(*reg_mask & dreg)) 1628 return 0; 1629 *reg_mask &= ~dreg; 1630 /* It's ld_imm64 or ld_abs or ld_ind. 1631 * For ld_imm64 no further tracking of precision 1632 * into parent is necessary 1633 */ 1634 if (mode == BPF_IND || mode == BPF_ABS) 1635 /* to be analyzed */ 1636 return -ENOTSUPP; 1637 } 1638 return 0; 1639 } 1640 1641 /* the scalar precision tracking algorithm: 1642 * . at the start all registers have precise=false. 1643 * . scalar ranges are tracked as normal through alu and jmp insns. 1644 * . once precise value of the scalar register is used in: 1645 * . ptr + scalar alu 1646 * . if (scalar cond K|scalar) 1647 * . helper_call(.., scalar, ...) where ARG_CONST is expected 1648 * backtrack through the verifier states and mark all registers and 1649 * stack slots with spilled constants that these scalar regisers 1650 * should be precise. 1651 * . during state pruning two registers (or spilled stack slots) 1652 * are equivalent if both are not precise. 1653 * 1654 * Note the verifier cannot simply walk register parentage chain, 1655 * since many different registers and stack slots could have been 1656 * used to compute single precise scalar. 1657 * 1658 * The approach of starting with precise=true for all registers and then 1659 * backtrack to mark a register as not precise when the verifier detects 1660 * that program doesn't care about specific value (e.g., when helper 1661 * takes register as ARG_ANYTHING parameter) is not safe. 1662 * 1663 * It's ok to walk single parentage chain of the verifier states. 1664 * It's possible that this backtracking will go all the way till 1st insn. 1665 * All other branches will be explored for needing precision later. 1666 * 1667 * The backtracking needs to deal with cases like: 1668 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0) 1669 * r9 -= r8 1670 * r5 = r9 1671 * if r5 > 0x79f goto pc+7 1672 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff)) 1673 * r5 += 1 1674 * ... 1675 * call bpf_perf_event_output#25 1676 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO 1677 * 1678 * and this case: 1679 * r6 = 1 1680 * call foo // uses callee's r6 inside to compute r0 1681 * r0 += r6 1682 * if r0 == 0 goto 1683 * 1684 * to track above reg_mask/stack_mask needs to be independent for each frame. 1685 * 1686 * Also if parent's curframe > frame where backtracking started, 1687 * the verifier need to mark registers in both frames, otherwise callees 1688 * may incorrectly prune callers. This is similar to 1689 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences") 1690 * 1691 * For now backtracking falls back into conservative marking. 1692 */ 1693 static void mark_all_scalars_precise(struct bpf_verifier_env *env, 1694 struct bpf_verifier_state *st) 1695 { 1696 struct bpf_func_state *func; 1697 struct bpf_reg_state *reg; 1698 int i, j; 1699 1700 /* big hammer: mark all scalars precise in this path. 1701 * pop_stack may still get !precise scalars. 1702 */ 1703 for (; st; st = st->parent) 1704 for (i = 0; i <= st->curframe; i++) { 1705 func = st->frame[i]; 1706 for (j = 0; j < BPF_REG_FP; j++) { 1707 reg = &func->regs[j]; 1708 if (reg->type != SCALAR_VALUE) 1709 continue; 1710 reg->precise = true; 1711 } 1712 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { 1713 if (func->stack[j].slot_type[0] != STACK_SPILL) 1714 continue; 1715 reg = &func->stack[j].spilled_ptr; 1716 if (reg->type != SCALAR_VALUE) 1717 continue; 1718 reg->precise = true; 1719 } 1720 } 1721 } 1722 1723 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, 1724 int spi) 1725 { 1726 struct bpf_verifier_state *st = env->cur_state; 1727 int first_idx = st->first_insn_idx; 1728 int last_idx = env->insn_idx; 1729 struct bpf_func_state *func; 1730 struct bpf_reg_state *reg; 1731 u32 reg_mask = regno >= 0 ? 1u << regno : 0; 1732 u64 stack_mask = spi >= 0 ? 1ull << spi : 0; 1733 bool skip_first = true; 1734 bool new_marks = false; 1735 int i, err; 1736 1737 if (!env->allow_ptr_leaks) 1738 /* backtracking is root only for now */ 1739 return 0; 1740 1741 func = st->frame[st->curframe]; 1742 if (regno >= 0) { 1743 reg = &func->regs[regno]; 1744 if (reg->type != SCALAR_VALUE) { 1745 WARN_ONCE(1, "backtracing misuse"); 1746 return -EFAULT; 1747 } 1748 if (!reg->precise) 1749 new_marks = true; 1750 else 1751 reg_mask = 0; 1752 reg->precise = true; 1753 } 1754 1755 while (spi >= 0) { 1756 if (func->stack[spi].slot_type[0] != STACK_SPILL) { 1757 stack_mask = 0; 1758 break; 1759 } 1760 reg = &func->stack[spi].spilled_ptr; 1761 if (reg->type != SCALAR_VALUE) { 1762 stack_mask = 0; 1763 break; 1764 } 1765 if (!reg->precise) 1766 new_marks = true; 1767 else 1768 stack_mask = 0; 1769 reg->precise = true; 1770 break; 1771 } 1772 1773 if (!new_marks) 1774 return 0; 1775 if (!reg_mask && !stack_mask) 1776 return 0; 1777 for (;;) { 1778 DECLARE_BITMAP(mask, 64); 1779 u32 history = st->jmp_history_cnt; 1780 1781 if (env->log.level & BPF_LOG_LEVEL) 1782 verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx); 1783 for (i = last_idx;;) { 1784 if (skip_first) { 1785 err = 0; 1786 skip_first = false; 1787 } else { 1788 err = backtrack_insn(env, i, ®_mask, &stack_mask); 1789 } 1790 if (err == -ENOTSUPP) { 1791 mark_all_scalars_precise(env, st); 1792 return 0; 1793 } else if (err) { 1794 return err; 1795 } 1796 if (!reg_mask && !stack_mask) 1797 /* Found assignment(s) into tracked register in this state. 1798 * Since this state is already marked, just return. 1799 * Nothing to be tracked further in the parent state. 1800 */ 1801 return 0; 1802 if (i == first_idx) 1803 break; 1804 i = get_prev_insn_idx(st, i, &history); 1805 if (i >= env->prog->len) { 1806 /* This can happen if backtracking reached insn 0 1807 * and there are still reg_mask or stack_mask 1808 * to backtrack. 1809 * It means the backtracking missed the spot where 1810 * particular register was initialized with a constant. 1811 */ 1812 verbose(env, "BUG backtracking idx %d\n", i); 1813 WARN_ONCE(1, "verifier backtracking bug"); 1814 return -EFAULT; 1815 } 1816 } 1817 st = st->parent; 1818 if (!st) 1819 break; 1820 1821 new_marks = false; 1822 func = st->frame[st->curframe]; 1823 bitmap_from_u64(mask, reg_mask); 1824 for_each_set_bit(i, mask, 32) { 1825 reg = &func->regs[i]; 1826 if (reg->type != SCALAR_VALUE) { 1827 reg_mask &= ~(1u << i); 1828 continue; 1829 } 1830 if (!reg->precise) 1831 new_marks = true; 1832 reg->precise = true; 1833 } 1834 1835 bitmap_from_u64(mask, stack_mask); 1836 for_each_set_bit(i, mask, 64) { 1837 if (i >= func->allocated_stack / BPF_REG_SIZE) { 1838 /* the sequence of instructions: 1839 * 2: (bf) r3 = r10 1840 * 3: (7b) *(u64 *)(r3 -8) = r0 1841 * 4: (79) r4 = *(u64 *)(r10 -8) 1842 * doesn't contain jmps. It's backtracked 1843 * as a single block. 1844 * During backtracking insn 3 is not recognized as 1845 * stack access, so at the end of backtracking 1846 * stack slot fp-8 is still marked in stack_mask. 1847 * However the parent state may not have accessed 1848 * fp-8 and it's "unallocated" stack space. 1849 * In such case fallback to conservative. 1850 */ 1851 mark_all_scalars_precise(env, st); 1852 return 0; 1853 } 1854 1855 if (func->stack[i].slot_type[0] != STACK_SPILL) { 1856 stack_mask &= ~(1ull << i); 1857 continue; 1858 } 1859 reg = &func->stack[i].spilled_ptr; 1860 if (reg->type != SCALAR_VALUE) { 1861 stack_mask &= ~(1ull << i); 1862 continue; 1863 } 1864 if (!reg->precise) 1865 new_marks = true; 1866 reg->precise = true; 1867 } 1868 if (env->log.level & BPF_LOG_LEVEL) { 1869 print_verifier_state(env, func); 1870 verbose(env, "parent %s regs=%x stack=%llx marks\n", 1871 new_marks ? "didn't have" : "already had", 1872 reg_mask, stack_mask); 1873 } 1874 1875 if (!reg_mask && !stack_mask) 1876 break; 1877 if (!new_marks) 1878 break; 1879 1880 last_idx = st->last_insn_idx; 1881 first_idx = st->first_insn_idx; 1882 } 1883 return 0; 1884 } 1885 1886 static int mark_chain_precision(struct bpf_verifier_env *env, int regno) 1887 { 1888 return __mark_chain_precision(env, regno, -1); 1889 } 1890 1891 static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi) 1892 { 1893 return __mark_chain_precision(env, -1, spi); 1894 } 1895 1896 static bool is_spillable_regtype(enum bpf_reg_type type) 1897 { 1898 switch (type) { 1899 case PTR_TO_MAP_VALUE: 1900 case PTR_TO_MAP_VALUE_OR_NULL: 1901 case PTR_TO_STACK: 1902 case PTR_TO_CTX: 1903 case PTR_TO_PACKET: 1904 case PTR_TO_PACKET_META: 1905 case PTR_TO_PACKET_END: 1906 case PTR_TO_FLOW_KEYS: 1907 case CONST_PTR_TO_MAP: 1908 case PTR_TO_SOCKET: 1909 case PTR_TO_SOCKET_OR_NULL: 1910 case PTR_TO_SOCK_COMMON: 1911 case PTR_TO_SOCK_COMMON_OR_NULL: 1912 case PTR_TO_TCP_SOCK: 1913 case PTR_TO_TCP_SOCK_OR_NULL: 1914 case PTR_TO_XDP_SOCK: 1915 case PTR_TO_BTF_ID: 1916 return true; 1917 default: 1918 return false; 1919 } 1920 } 1921 1922 /* Does this register contain a constant zero? */ 1923 static bool register_is_null(struct bpf_reg_state *reg) 1924 { 1925 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0); 1926 } 1927 1928 static bool register_is_const(struct bpf_reg_state *reg) 1929 { 1930 return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off); 1931 } 1932 1933 static void save_register_state(struct bpf_func_state *state, 1934 int spi, struct bpf_reg_state *reg) 1935 { 1936 int i; 1937 1938 state->stack[spi].spilled_ptr = *reg; 1939 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; 1940 1941 for (i = 0; i < BPF_REG_SIZE; i++) 1942 state->stack[spi].slot_type[i] = STACK_SPILL; 1943 } 1944 1945 /* check_stack_read/write functions track spill/fill of registers, 1946 * stack boundary and alignment are checked in check_mem_access() 1947 */ 1948 static int check_stack_write(struct bpf_verifier_env *env, 1949 struct bpf_func_state *state, /* func where register points to */ 1950 int off, int size, int value_regno, int insn_idx) 1951 { 1952 struct bpf_func_state *cur; /* state of the current function */ 1953 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; 1954 u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg; 1955 struct bpf_reg_state *reg = NULL; 1956 1957 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), 1958 state->acquired_refs, true); 1959 if (err) 1960 return err; 1961 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, 1962 * so it's aligned access and [off, off + size) are within stack limits 1963 */ 1964 if (!env->allow_ptr_leaks && 1965 state->stack[spi].slot_type[0] == STACK_SPILL && 1966 size != BPF_REG_SIZE) { 1967 verbose(env, "attempt to corrupt spilled pointer on stack\n"); 1968 return -EACCES; 1969 } 1970 1971 cur = env->cur_state->frame[env->cur_state->curframe]; 1972 if (value_regno >= 0) 1973 reg = &cur->regs[value_regno]; 1974 1975 if (reg && size == BPF_REG_SIZE && register_is_const(reg) && 1976 !register_is_null(reg) && env->allow_ptr_leaks) { 1977 if (dst_reg != BPF_REG_FP) { 1978 /* The backtracking logic can only recognize explicit 1979 * stack slot address like [fp - 8]. Other spill of 1980 * scalar via different register has to be conervative. 1981 * Backtrack from here and mark all registers as precise 1982 * that contributed into 'reg' being a constant. 1983 */ 1984 err = mark_chain_precision(env, value_regno); 1985 if (err) 1986 return err; 1987 } 1988 save_register_state(state, spi, reg); 1989 } else if (reg && is_spillable_regtype(reg->type)) { 1990 /* register containing pointer is being spilled into stack */ 1991 if (size != BPF_REG_SIZE) { 1992 verbose_linfo(env, insn_idx, "; "); 1993 verbose(env, "invalid size of register spill\n"); 1994 return -EACCES; 1995 } 1996 1997 if (state != cur && reg->type == PTR_TO_STACK) { 1998 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); 1999 return -EINVAL; 2000 } 2001 2002 if (!env->allow_ptr_leaks) { 2003 bool sanitize = false; 2004 2005 if (state->stack[spi].slot_type[0] == STACK_SPILL && 2006 register_is_const(&state->stack[spi].spilled_ptr)) 2007 sanitize = true; 2008 for (i = 0; i < BPF_REG_SIZE; i++) 2009 if (state->stack[spi].slot_type[i] == STACK_MISC) { 2010 sanitize = true; 2011 break; 2012 } 2013 if (sanitize) { 2014 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; 2015 int soff = (-spi - 1) * BPF_REG_SIZE; 2016 2017 /* detected reuse of integer stack slot with a pointer 2018 * which means either llvm is reusing stack slot or 2019 * an attacker is trying to exploit CVE-2018-3639 2020 * (speculative store bypass) 2021 * Have to sanitize that slot with preemptive 2022 * store of zero. 2023 */ 2024 if (*poff && *poff != soff) { 2025 /* disallow programs where single insn stores 2026 * into two different stack slots, since verifier 2027 * cannot sanitize them 2028 */ 2029 verbose(env, 2030 "insn %d cannot access two stack slots fp%d and fp%d", 2031 insn_idx, *poff, soff); 2032 return -EINVAL; 2033 } 2034 *poff = soff; 2035 } 2036 } 2037 save_register_state(state, spi, reg); 2038 } else { 2039 u8 type = STACK_MISC; 2040 2041 /* regular write of data into stack destroys any spilled ptr */ 2042 state->stack[spi].spilled_ptr.type = NOT_INIT; 2043 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */ 2044 if (state->stack[spi].slot_type[0] == STACK_SPILL) 2045 for (i = 0; i < BPF_REG_SIZE; i++) 2046 state->stack[spi].slot_type[i] = STACK_MISC; 2047 2048 /* only mark the slot as written if all 8 bytes were written 2049 * otherwise read propagation may incorrectly stop too soon 2050 * when stack slots are partially written. 2051 * This heuristic means that read propagation will be 2052 * conservative, since it will add reg_live_read marks 2053 * to stack slots all the way to first state when programs 2054 * writes+reads less than 8 bytes 2055 */ 2056 if (size == BPF_REG_SIZE) 2057 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; 2058 2059 /* when we zero initialize stack slots mark them as such */ 2060 if (reg && register_is_null(reg)) { 2061 /* backtracking doesn't work for STACK_ZERO yet. */ 2062 err = mark_chain_precision(env, value_regno); 2063 if (err) 2064 return err; 2065 type = STACK_ZERO; 2066 } 2067 2068 /* Mark slots affected by this stack write. */ 2069 for (i = 0; i < size; i++) 2070 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = 2071 type; 2072 } 2073 return 0; 2074 } 2075 2076 static int check_stack_read(struct bpf_verifier_env *env, 2077 struct bpf_func_state *reg_state /* func where register points to */, 2078 int off, int size, int value_regno) 2079 { 2080 struct bpf_verifier_state *vstate = env->cur_state; 2081 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 2082 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; 2083 struct bpf_reg_state *reg; 2084 u8 *stype; 2085 2086 if (reg_state->allocated_stack <= slot) { 2087 verbose(env, "invalid read from stack off %d+0 size %d\n", 2088 off, size); 2089 return -EACCES; 2090 } 2091 stype = reg_state->stack[spi].slot_type; 2092 reg = ®_state->stack[spi].spilled_ptr; 2093 2094 if (stype[0] == STACK_SPILL) { 2095 if (size != BPF_REG_SIZE) { 2096 if (reg->type != SCALAR_VALUE) { 2097 verbose_linfo(env, env->insn_idx, "; "); 2098 verbose(env, "invalid size of register fill\n"); 2099 return -EACCES; 2100 } 2101 if (value_regno >= 0) { 2102 mark_reg_unknown(env, state->regs, value_regno); 2103 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 2104 } 2105 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); 2106 return 0; 2107 } 2108 for (i = 1; i < BPF_REG_SIZE; i++) { 2109 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { 2110 verbose(env, "corrupted spill memory\n"); 2111 return -EACCES; 2112 } 2113 } 2114 2115 if (value_regno >= 0) { 2116 /* restore register state from stack */ 2117 state->regs[value_regno] = *reg; 2118 /* mark reg as written since spilled pointer state likely 2119 * has its liveness marks cleared by is_state_visited() 2120 * which resets stack/reg liveness for state transitions 2121 */ 2122 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 2123 } 2124 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); 2125 } else { 2126 int zeros = 0; 2127 2128 for (i = 0; i < size; i++) { 2129 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC) 2130 continue; 2131 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) { 2132 zeros++; 2133 continue; 2134 } 2135 verbose(env, "invalid read from stack off %d+%d size %d\n", 2136 off, i, size); 2137 return -EACCES; 2138 } 2139 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); 2140 if (value_regno >= 0) { 2141 if (zeros == size) { 2142 /* any size read into register is zero extended, 2143 * so the whole register == const_zero 2144 */ 2145 __mark_reg_const_zero(&state->regs[value_regno]); 2146 /* backtracking doesn't support STACK_ZERO yet, 2147 * so mark it precise here, so that later 2148 * backtracking can stop here. 2149 * Backtracking may not need this if this register 2150 * doesn't participate in pointer adjustment. 2151 * Forward propagation of precise flag is not 2152 * necessary either. This mark is only to stop 2153 * backtracking. Any register that contributed 2154 * to const 0 was marked precise before spill. 2155 */ 2156 state->regs[value_regno].precise = true; 2157 } else { 2158 /* have read misc data from the stack */ 2159 mark_reg_unknown(env, state->regs, value_regno); 2160 } 2161 state->regs[value_regno].live |= REG_LIVE_WRITTEN; 2162 } 2163 } 2164 return 0; 2165 } 2166 2167 static int check_stack_access(struct bpf_verifier_env *env, 2168 const struct bpf_reg_state *reg, 2169 int off, int size) 2170 { 2171 /* Stack accesses must be at a fixed offset, so that we 2172 * can determine what type of data were returned. See 2173 * check_stack_read(). 2174 */ 2175 if (!tnum_is_const(reg->var_off)) { 2176 char tn_buf[48]; 2177 2178 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2179 verbose(env, "variable stack access var_off=%s off=%d size=%d\n", 2180 tn_buf, off, size); 2181 return -EACCES; 2182 } 2183 2184 if (off >= 0 || off < -MAX_BPF_STACK) { 2185 verbose(env, "invalid stack off=%d size=%d\n", off, size); 2186 return -EACCES; 2187 } 2188 2189 return 0; 2190 } 2191 2192 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno, 2193 int off, int size, enum bpf_access_type type) 2194 { 2195 struct bpf_reg_state *regs = cur_regs(env); 2196 struct bpf_map *map = regs[regno].map_ptr; 2197 u32 cap = bpf_map_flags_to_cap(map); 2198 2199 if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) { 2200 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n", 2201 map->value_size, off, size); 2202 return -EACCES; 2203 } 2204 2205 if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) { 2206 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n", 2207 map->value_size, off, size); 2208 return -EACCES; 2209 } 2210 2211 return 0; 2212 } 2213 2214 /* check read/write into map element returned by bpf_map_lookup_elem() */ 2215 static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, 2216 int size, bool zero_size_allowed) 2217 { 2218 struct bpf_reg_state *regs = cur_regs(env); 2219 struct bpf_map *map = regs[regno].map_ptr; 2220 2221 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) || 2222 off + size > map->value_size) { 2223 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", 2224 map->value_size, off, size); 2225 return -EACCES; 2226 } 2227 return 0; 2228 } 2229 2230 /* check read/write into a map element with possible variable offset */ 2231 static int check_map_access(struct bpf_verifier_env *env, u32 regno, 2232 int off, int size, bool zero_size_allowed) 2233 { 2234 struct bpf_verifier_state *vstate = env->cur_state; 2235 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 2236 struct bpf_reg_state *reg = &state->regs[regno]; 2237 int err; 2238 2239 /* We may have adjusted the register to this map value, so we 2240 * need to try adding each of min_value and max_value to off 2241 * to make sure our theoretical access will be safe. 2242 */ 2243 if (env->log.level & BPF_LOG_LEVEL) 2244 print_verifier_state(env, state); 2245 2246 /* The minimum value is only important with signed 2247 * comparisons where we can't assume the floor of a 2248 * value is 0. If we are using signed variables for our 2249 * index'es we need to make sure that whatever we use 2250 * will have a set floor within our range. 2251 */ 2252 if (reg->smin_value < 0 && 2253 (reg->smin_value == S64_MIN || 2254 (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) || 2255 reg->smin_value + off < 0)) { 2256 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 2257 regno); 2258 return -EACCES; 2259 } 2260 err = __check_map_access(env, regno, reg->smin_value + off, size, 2261 zero_size_allowed); 2262 if (err) { 2263 verbose(env, "R%d min value is outside of the array range\n", 2264 regno); 2265 return err; 2266 } 2267 2268 /* If we haven't set a max value then we need to bail since we can't be 2269 * sure we won't do bad things. 2270 * If reg->umax_value + off could overflow, treat that as unbounded too. 2271 */ 2272 if (reg->umax_value >= BPF_MAX_VAR_OFF) { 2273 verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n", 2274 regno); 2275 return -EACCES; 2276 } 2277 err = __check_map_access(env, regno, reg->umax_value + off, size, 2278 zero_size_allowed); 2279 if (err) 2280 verbose(env, "R%d max value is outside of the array range\n", 2281 regno); 2282 2283 if (map_value_has_spin_lock(reg->map_ptr)) { 2284 u32 lock = reg->map_ptr->spin_lock_off; 2285 2286 /* if any part of struct bpf_spin_lock can be touched by 2287 * load/store reject this program. 2288 * To check that [x1, x2) overlaps with [y1, y2) 2289 * it is sufficient to check x1 < y2 && y1 < x2. 2290 */ 2291 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) && 2292 lock < reg->umax_value + off + size) { 2293 verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n"); 2294 return -EACCES; 2295 } 2296 } 2297 return err; 2298 } 2299 2300 #define MAX_PACKET_OFF 0xffff 2301 2302 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, 2303 const struct bpf_call_arg_meta *meta, 2304 enum bpf_access_type t) 2305 { 2306 switch (env->prog->type) { 2307 /* Program types only with direct read access go here! */ 2308 case BPF_PROG_TYPE_LWT_IN: 2309 case BPF_PROG_TYPE_LWT_OUT: 2310 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 2311 case BPF_PROG_TYPE_SK_REUSEPORT: 2312 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2313 case BPF_PROG_TYPE_CGROUP_SKB: 2314 if (t == BPF_WRITE) 2315 return false; 2316 /* fallthrough */ 2317 2318 /* Program types with direct read + write access go here! */ 2319 case BPF_PROG_TYPE_SCHED_CLS: 2320 case BPF_PROG_TYPE_SCHED_ACT: 2321 case BPF_PROG_TYPE_XDP: 2322 case BPF_PROG_TYPE_LWT_XMIT: 2323 case BPF_PROG_TYPE_SK_SKB: 2324 case BPF_PROG_TYPE_SK_MSG: 2325 if (meta) 2326 return meta->pkt_access; 2327 2328 env->seen_direct_write = true; 2329 return true; 2330 2331 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2332 if (t == BPF_WRITE) 2333 env->seen_direct_write = true; 2334 2335 return true; 2336 2337 default: 2338 return false; 2339 } 2340 } 2341 2342 static int __check_packet_access(struct bpf_verifier_env *env, u32 regno, 2343 int off, int size, bool zero_size_allowed) 2344 { 2345 struct bpf_reg_state *regs = cur_regs(env); 2346 struct bpf_reg_state *reg = ®s[regno]; 2347 2348 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) || 2349 (u64)off + size > reg->range) { 2350 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", 2351 off, size, regno, reg->id, reg->off, reg->range); 2352 return -EACCES; 2353 } 2354 return 0; 2355 } 2356 2357 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, 2358 int size, bool zero_size_allowed) 2359 { 2360 struct bpf_reg_state *regs = cur_regs(env); 2361 struct bpf_reg_state *reg = ®s[regno]; 2362 int err; 2363 2364 /* We may have added a variable offset to the packet pointer; but any 2365 * reg->range we have comes after that. We are only checking the fixed 2366 * offset. 2367 */ 2368 2369 /* We don't allow negative numbers, because we aren't tracking enough 2370 * detail to prove they're safe. 2371 */ 2372 if (reg->smin_value < 0) { 2373 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 2374 regno); 2375 return -EACCES; 2376 } 2377 err = __check_packet_access(env, regno, off, size, zero_size_allowed); 2378 if (err) { 2379 verbose(env, "R%d offset is outside of the packet\n", regno); 2380 return err; 2381 } 2382 2383 /* __check_packet_access has made sure "off + size - 1" is within u16. 2384 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff, 2385 * otherwise find_good_pkt_pointers would have refused to set range info 2386 * that __check_packet_access would have rejected this pkt access. 2387 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32. 2388 */ 2389 env->prog->aux->max_pkt_offset = 2390 max_t(u32, env->prog->aux->max_pkt_offset, 2391 off + reg->umax_value + size - 1); 2392 2393 return err; 2394 } 2395 2396 /* check access to 'struct bpf_context' fields. Supports fixed offsets only */ 2397 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, 2398 enum bpf_access_type t, enum bpf_reg_type *reg_type, 2399 u32 *btf_id) 2400 { 2401 struct bpf_insn_access_aux info = { 2402 .reg_type = *reg_type, 2403 .log = &env->log, 2404 }; 2405 2406 if (env->ops->is_valid_access && 2407 env->ops->is_valid_access(off, size, t, env->prog, &info)) { 2408 /* A non zero info.ctx_field_size indicates that this field is a 2409 * candidate for later verifier transformation to load the whole 2410 * field and then apply a mask when accessed with a narrower 2411 * access than actual ctx access size. A zero info.ctx_field_size 2412 * will only allow for whole field access and rejects any other 2413 * type of narrower access. 2414 */ 2415 *reg_type = info.reg_type; 2416 2417 if (*reg_type == PTR_TO_BTF_ID) 2418 *btf_id = info.btf_id; 2419 else 2420 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; 2421 /* remember the offset of last byte accessed in ctx */ 2422 if (env->prog->aux->max_ctx_offset < off + size) 2423 env->prog->aux->max_ctx_offset = off + size; 2424 return 0; 2425 } 2426 2427 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size); 2428 return -EACCES; 2429 } 2430 2431 static int check_flow_keys_access(struct bpf_verifier_env *env, int off, 2432 int size) 2433 { 2434 if (size < 0 || off < 0 || 2435 (u64)off + size > sizeof(struct bpf_flow_keys)) { 2436 verbose(env, "invalid access to flow keys off=%d size=%d\n", 2437 off, size); 2438 return -EACCES; 2439 } 2440 return 0; 2441 } 2442 2443 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, 2444 u32 regno, int off, int size, 2445 enum bpf_access_type t) 2446 { 2447 struct bpf_reg_state *regs = cur_regs(env); 2448 struct bpf_reg_state *reg = ®s[regno]; 2449 struct bpf_insn_access_aux info = {}; 2450 bool valid; 2451 2452 if (reg->smin_value < 0) { 2453 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 2454 regno); 2455 return -EACCES; 2456 } 2457 2458 switch (reg->type) { 2459 case PTR_TO_SOCK_COMMON: 2460 valid = bpf_sock_common_is_valid_access(off, size, t, &info); 2461 break; 2462 case PTR_TO_SOCKET: 2463 valid = bpf_sock_is_valid_access(off, size, t, &info); 2464 break; 2465 case PTR_TO_TCP_SOCK: 2466 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); 2467 break; 2468 case PTR_TO_XDP_SOCK: 2469 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info); 2470 break; 2471 default: 2472 valid = false; 2473 } 2474 2475 2476 if (valid) { 2477 env->insn_aux_data[insn_idx].ctx_field_size = 2478 info.ctx_field_size; 2479 return 0; 2480 } 2481 2482 verbose(env, "R%d invalid %s access off=%d size=%d\n", 2483 regno, reg_type_str[reg->type], off, size); 2484 2485 return -EACCES; 2486 } 2487 2488 static bool __is_pointer_value(bool allow_ptr_leaks, 2489 const struct bpf_reg_state *reg) 2490 { 2491 if (allow_ptr_leaks) 2492 return false; 2493 2494 return reg->type != SCALAR_VALUE; 2495 } 2496 2497 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) 2498 { 2499 return cur_regs(env) + regno; 2500 } 2501 2502 static bool is_pointer_value(struct bpf_verifier_env *env, int regno) 2503 { 2504 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno)); 2505 } 2506 2507 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) 2508 { 2509 const struct bpf_reg_state *reg = reg_state(env, regno); 2510 2511 return reg->type == PTR_TO_CTX; 2512 } 2513 2514 static bool is_sk_reg(struct bpf_verifier_env *env, int regno) 2515 { 2516 const struct bpf_reg_state *reg = reg_state(env, regno); 2517 2518 return type_is_sk_pointer(reg->type); 2519 } 2520 2521 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) 2522 { 2523 const struct bpf_reg_state *reg = reg_state(env, regno); 2524 2525 return type_is_pkt_pointer(reg->type); 2526 } 2527 2528 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) 2529 { 2530 const struct bpf_reg_state *reg = reg_state(env, regno); 2531 2532 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */ 2533 return reg->type == PTR_TO_FLOW_KEYS; 2534 } 2535 2536 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, 2537 const struct bpf_reg_state *reg, 2538 int off, int size, bool strict) 2539 { 2540 struct tnum reg_off; 2541 int ip_align; 2542 2543 /* Byte size accesses are always allowed. */ 2544 if (!strict || size == 1) 2545 return 0; 2546 2547 /* For platforms that do not have a Kconfig enabling 2548 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of 2549 * NET_IP_ALIGN is universally set to '2'. And on platforms 2550 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get 2551 * to this code only in strict mode where we want to emulate 2552 * the NET_IP_ALIGN==2 checking. Therefore use an 2553 * unconditional IP align value of '2'. 2554 */ 2555 ip_align = 2; 2556 2557 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off)); 2558 if (!tnum_is_aligned(reg_off, size)) { 2559 char tn_buf[48]; 2560 2561 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2562 verbose(env, 2563 "misaligned packet access off %d+%s+%d+%d size %d\n", 2564 ip_align, tn_buf, reg->off, off, size); 2565 return -EACCES; 2566 } 2567 2568 return 0; 2569 } 2570 2571 static int check_generic_ptr_alignment(struct bpf_verifier_env *env, 2572 const struct bpf_reg_state *reg, 2573 const char *pointer_desc, 2574 int off, int size, bool strict) 2575 { 2576 struct tnum reg_off; 2577 2578 /* Byte size accesses are always allowed. */ 2579 if (!strict || size == 1) 2580 return 0; 2581 2582 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off)); 2583 if (!tnum_is_aligned(reg_off, size)) { 2584 char tn_buf[48]; 2585 2586 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2587 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n", 2588 pointer_desc, tn_buf, reg->off, off, size); 2589 return -EACCES; 2590 } 2591 2592 return 0; 2593 } 2594 2595 static int check_ptr_alignment(struct bpf_verifier_env *env, 2596 const struct bpf_reg_state *reg, int off, 2597 int size, bool strict_alignment_once) 2598 { 2599 bool strict = env->strict_alignment || strict_alignment_once; 2600 const char *pointer_desc = ""; 2601 2602 switch (reg->type) { 2603 case PTR_TO_PACKET: 2604 case PTR_TO_PACKET_META: 2605 /* Special case, because of NET_IP_ALIGN. Given metadata sits 2606 * right in front, treat it the very same way. 2607 */ 2608 return check_pkt_ptr_alignment(env, reg, off, size, strict); 2609 case PTR_TO_FLOW_KEYS: 2610 pointer_desc = "flow keys "; 2611 break; 2612 case PTR_TO_MAP_VALUE: 2613 pointer_desc = "value "; 2614 break; 2615 case PTR_TO_CTX: 2616 pointer_desc = "context "; 2617 break; 2618 case PTR_TO_STACK: 2619 pointer_desc = "stack "; 2620 /* The stack spill tracking logic in check_stack_write() 2621 * and check_stack_read() relies on stack accesses being 2622 * aligned. 2623 */ 2624 strict = true; 2625 break; 2626 case PTR_TO_SOCKET: 2627 pointer_desc = "sock "; 2628 break; 2629 case PTR_TO_SOCK_COMMON: 2630 pointer_desc = "sock_common "; 2631 break; 2632 case PTR_TO_TCP_SOCK: 2633 pointer_desc = "tcp_sock "; 2634 break; 2635 case PTR_TO_XDP_SOCK: 2636 pointer_desc = "xdp_sock "; 2637 break; 2638 default: 2639 break; 2640 } 2641 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size, 2642 strict); 2643 } 2644 2645 static int update_stack_depth(struct bpf_verifier_env *env, 2646 const struct bpf_func_state *func, 2647 int off) 2648 { 2649 u16 stack = env->subprog_info[func->subprogno].stack_depth; 2650 2651 if (stack >= -off) 2652 return 0; 2653 2654 /* update known max for given subprogram */ 2655 env->subprog_info[func->subprogno].stack_depth = -off; 2656 return 0; 2657 } 2658 2659 /* starting from main bpf function walk all instructions of the function 2660 * and recursively walk all callees that given function can call. 2661 * Ignore jump and exit insns. 2662 * Since recursion is prevented by check_cfg() this algorithm 2663 * only needs a local stack of MAX_CALL_FRAMES to remember callsites 2664 */ 2665 static int check_max_stack_depth(struct bpf_verifier_env *env) 2666 { 2667 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end; 2668 struct bpf_subprog_info *subprog = env->subprog_info; 2669 struct bpf_insn *insn = env->prog->insnsi; 2670 int ret_insn[MAX_CALL_FRAMES]; 2671 int ret_prog[MAX_CALL_FRAMES]; 2672 2673 process_func: 2674 /* round up to 32-bytes, since this is granularity 2675 * of interpreter stack size 2676 */ 2677 depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); 2678 if (depth > MAX_BPF_STACK) { 2679 verbose(env, "combined stack size of %d calls is %d. Too large\n", 2680 frame + 1, depth); 2681 return -EACCES; 2682 } 2683 continue_func: 2684 subprog_end = subprog[idx + 1].start; 2685 for (; i < subprog_end; i++) { 2686 if (insn[i].code != (BPF_JMP | BPF_CALL)) 2687 continue; 2688 if (insn[i].src_reg != BPF_PSEUDO_CALL) 2689 continue; 2690 /* remember insn and function to return to */ 2691 ret_insn[frame] = i + 1; 2692 ret_prog[frame] = idx; 2693 2694 /* find the callee */ 2695 i = i + insn[i].imm + 1; 2696 idx = find_subprog(env, i); 2697 if (idx < 0) { 2698 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", 2699 i); 2700 return -EFAULT; 2701 } 2702 frame++; 2703 if (frame >= MAX_CALL_FRAMES) { 2704 verbose(env, "the call stack of %d frames is too deep !\n", 2705 frame); 2706 return -E2BIG; 2707 } 2708 goto process_func; 2709 } 2710 /* end of for() loop means the last insn of the 'subprog' 2711 * was reached. Doesn't matter whether it was JA or EXIT 2712 */ 2713 if (frame == 0) 2714 return 0; 2715 depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); 2716 frame--; 2717 i = ret_insn[frame]; 2718 idx = ret_prog[frame]; 2719 goto continue_func; 2720 } 2721 2722 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 2723 static int get_callee_stack_depth(struct bpf_verifier_env *env, 2724 const struct bpf_insn *insn, int idx) 2725 { 2726 int start = idx + insn->imm + 1, subprog; 2727 2728 subprog = find_subprog(env, start); 2729 if (subprog < 0) { 2730 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", 2731 start); 2732 return -EFAULT; 2733 } 2734 return env->subprog_info[subprog].stack_depth; 2735 } 2736 #endif 2737 2738 int check_ctx_reg(struct bpf_verifier_env *env, 2739 const struct bpf_reg_state *reg, int regno) 2740 { 2741 /* Access to ctx or passing it to a helper is only allowed in 2742 * its original, unmodified form. 2743 */ 2744 2745 if (reg->off) { 2746 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n", 2747 regno, reg->off); 2748 return -EACCES; 2749 } 2750 2751 if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 2752 char tn_buf[48]; 2753 2754 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2755 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf); 2756 return -EACCES; 2757 } 2758 2759 return 0; 2760 } 2761 2762 static int check_tp_buffer_access(struct bpf_verifier_env *env, 2763 const struct bpf_reg_state *reg, 2764 int regno, int off, int size) 2765 { 2766 if (off < 0) { 2767 verbose(env, 2768 "R%d invalid tracepoint buffer access: off=%d, size=%d", 2769 regno, off, size); 2770 return -EACCES; 2771 } 2772 if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 2773 char tn_buf[48]; 2774 2775 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2776 verbose(env, 2777 "R%d invalid variable buffer offset: off=%d, var_off=%s", 2778 regno, off, tn_buf); 2779 return -EACCES; 2780 } 2781 if (off + size > env->prog->aux->max_tp_access) 2782 env->prog->aux->max_tp_access = off + size; 2783 2784 return 0; 2785 } 2786 2787 2788 /* truncate register to smaller size (in bytes) 2789 * must be called with size < BPF_REG_SIZE 2790 */ 2791 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) 2792 { 2793 u64 mask; 2794 2795 /* clear high bits in bit representation */ 2796 reg->var_off = tnum_cast(reg->var_off, size); 2797 2798 /* fix arithmetic bounds */ 2799 mask = ((u64)1 << (size * 8)) - 1; 2800 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) { 2801 reg->umin_value &= mask; 2802 reg->umax_value &= mask; 2803 } else { 2804 reg->umin_value = 0; 2805 reg->umax_value = mask; 2806 } 2807 reg->smin_value = reg->umin_value; 2808 reg->smax_value = reg->umax_value; 2809 } 2810 2811 static bool bpf_map_is_rdonly(const struct bpf_map *map) 2812 { 2813 return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen; 2814 } 2815 2816 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) 2817 { 2818 void *ptr; 2819 u64 addr; 2820 int err; 2821 2822 err = map->ops->map_direct_value_addr(map, &addr, off); 2823 if (err) 2824 return err; 2825 ptr = (void *)(long)addr + off; 2826 2827 switch (size) { 2828 case sizeof(u8): 2829 *val = (u64)*(u8 *)ptr; 2830 break; 2831 case sizeof(u16): 2832 *val = (u64)*(u16 *)ptr; 2833 break; 2834 case sizeof(u32): 2835 *val = (u64)*(u32 *)ptr; 2836 break; 2837 case sizeof(u64): 2838 *val = *(u64 *)ptr; 2839 break; 2840 default: 2841 return -EINVAL; 2842 } 2843 return 0; 2844 } 2845 2846 static int check_ptr_to_btf_access(struct bpf_verifier_env *env, 2847 struct bpf_reg_state *regs, 2848 int regno, int off, int size, 2849 enum bpf_access_type atype, 2850 int value_regno) 2851 { 2852 struct bpf_reg_state *reg = regs + regno; 2853 const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id); 2854 const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off); 2855 u32 btf_id; 2856 int ret; 2857 2858 if (off < 0) { 2859 verbose(env, 2860 "R%d is ptr_%s invalid negative access: off=%d\n", 2861 regno, tname, off); 2862 return -EACCES; 2863 } 2864 if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 2865 char tn_buf[48]; 2866 2867 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 2868 verbose(env, 2869 "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n", 2870 regno, tname, off, tn_buf); 2871 return -EACCES; 2872 } 2873 2874 if (env->ops->btf_struct_access) { 2875 ret = env->ops->btf_struct_access(&env->log, t, off, size, 2876 atype, &btf_id); 2877 } else { 2878 if (atype != BPF_READ) { 2879 verbose(env, "only read is supported\n"); 2880 return -EACCES; 2881 } 2882 2883 ret = btf_struct_access(&env->log, t, off, size, atype, 2884 &btf_id); 2885 } 2886 2887 if (ret < 0) 2888 return ret; 2889 2890 if (atype == BPF_READ) { 2891 if (ret == SCALAR_VALUE) { 2892 mark_reg_unknown(env, regs, value_regno); 2893 return 0; 2894 } 2895 mark_reg_known_zero(env, regs, value_regno); 2896 regs[value_regno].type = PTR_TO_BTF_ID; 2897 regs[value_regno].btf_id = btf_id; 2898 } 2899 2900 return 0; 2901 } 2902 2903 /* check whether memory at (regno + off) is accessible for t = (read | write) 2904 * if t==write, value_regno is a register which value is stored into memory 2905 * if t==read, value_regno is a register which will receive the value from memory 2906 * if t==write && value_regno==-1, some unknown value is stored into memory 2907 * if t==read && value_regno==-1, don't care what we read from memory 2908 */ 2909 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, 2910 int off, int bpf_size, enum bpf_access_type t, 2911 int value_regno, bool strict_alignment_once) 2912 { 2913 struct bpf_reg_state *regs = cur_regs(env); 2914 struct bpf_reg_state *reg = regs + regno; 2915 struct bpf_func_state *state; 2916 int size, err = 0; 2917 2918 size = bpf_size_to_bytes(bpf_size); 2919 if (size < 0) 2920 return size; 2921 2922 /* alignment checks will add in reg->off themselves */ 2923 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once); 2924 if (err) 2925 return err; 2926 2927 /* for access checks, reg->off is just part of off */ 2928 off += reg->off; 2929 2930 if (reg->type == PTR_TO_MAP_VALUE) { 2931 if (t == BPF_WRITE && value_regno >= 0 && 2932 is_pointer_value(env, value_regno)) { 2933 verbose(env, "R%d leaks addr into map\n", value_regno); 2934 return -EACCES; 2935 } 2936 err = check_map_access_type(env, regno, off, size, t); 2937 if (err) 2938 return err; 2939 err = check_map_access(env, regno, off, size, false); 2940 if (!err && t == BPF_READ && value_regno >= 0) { 2941 struct bpf_map *map = reg->map_ptr; 2942 2943 /* if map is read-only, track its contents as scalars */ 2944 if (tnum_is_const(reg->var_off) && 2945 bpf_map_is_rdonly(map) && 2946 map->ops->map_direct_value_addr) { 2947 int map_off = off + reg->var_off.value; 2948 u64 val = 0; 2949 2950 err = bpf_map_direct_read(map, map_off, size, 2951 &val); 2952 if (err) 2953 return err; 2954 2955 regs[value_regno].type = SCALAR_VALUE; 2956 __mark_reg_known(®s[value_regno], val); 2957 } else { 2958 mark_reg_unknown(env, regs, value_regno); 2959 } 2960 } 2961 } else if (reg->type == PTR_TO_CTX) { 2962 enum bpf_reg_type reg_type = SCALAR_VALUE; 2963 u32 btf_id = 0; 2964 2965 if (t == BPF_WRITE && value_regno >= 0 && 2966 is_pointer_value(env, value_regno)) { 2967 verbose(env, "R%d leaks addr into ctx\n", value_regno); 2968 return -EACCES; 2969 } 2970 2971 err = check_ctx_reg(env, reg, regno); 2972 if (err < 0) 2973 return err; 2974 2975 err = check_ctx_access(env, insn_idx, off, size, t, ®_type, &btf_id); 2976 if (err) 2977 verbose_linfo(env, insn_idx, "; "); 2978 if (!err && t == BPF_READ && value_regno >= 0) { 2979 /* ctx access returns either a scalar, or a 2980 * PTR_TO_PACKET[_META,_END]. In the latter 2981 * case, we know the offset is zero. 2982 */ 2983 if (reg_type == SCALAR_VALUE) { 2984 mark_reg_unknown(env, regs, value_regno); 2985 } else { 2986 mark_reg_known_zero(env, regs, 2987 value_regno); 2988 if (reg_type_may_be_null(reg_type)) 2989 regs[value_regno].id = ++env->id_gen; 2990 /* A load of ctx field could have different 2991 * actual load size with the one encoded in the 2992 * insn. When the dst is PTR, it is for sure not 2993 * a sub-register. 2994 */ 2995 regs[value_regno].subreg_def = DEF_NOT_SUBREG; 2996 if (reg_type == PTR_TO_BTF_ID) 2997 regs[value_regno].btf_id = btf_id; 2998 } 2999 regs[value_regno].type = reg_type; 3000 } 3001 3002 } else if (reg->type == PTR_TO_STACK) { 3003 off += reg->var_off.value; 3004 err = check_stack_access(env, reg, off, size); 3005 if (err) 3006 return err; 3007 3008 state = func(env, reg); 3009 err = update_stack_depth(env, state, off); 3010 if (err) 3011 return err; 3012 3013 if (t == BPF_WRITE) 3014 err = check_stack_write(env, state, off, size, 3015 value_regno, insn_idx); 3016 else 3017 err = check_stack_read(env, state, off, size, 3018 value_regno); 3019 } else if (reg_is_pkt_pointer(reg)) { 3020 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { 3021 verbose(env, "cannot write into packet\n"); 3022 return -EACCES; 3023 } 3024 if (t == BPF_WRITE && value_regno >= 0 && 3025 is_pointer_value(env, value_regno)) { 3026 verbose(env, "R%d leaks addr into packet\n", 3027 value_regno); 3028 return -EACCES; 3029 } 3030 err = check_packet_access(env, regno, off, size, false); 3031 if (!err && t == BPF_READ && value_regno >= 0) 3032 mark_reg_unknown(env, regs, value_regno); 3033 } else if (reg->type == PTR_TO_FLOW_KEYS) { 3034 if (t == BPF_WRITE && value_regno >= 0 && 3035 is_pointer_value(env, value_regno)) { 3036 verbose(env, "R%d leaks addr into flow keys\n", 3037 value_regno); 3038 return -EACCES; 3039 } 3040 3041 err = check_flow_keys_access(env, off, size); 3042 if (!err && t == BPF_READ && value_regno >= 0) 3043 mark_reg_unknown(env, regs, value_regno); 3044 } else if (type_is_sk_pointer(reg->type)) { 3045 if (t == BPF_WRITE) { 3046 verbose(env, "R%d cannot write into %s\n", 3047 regno, reg_type_str[reg->type]); 3048 return -EACCES; 3049 } 3050 err = check_sock_access(env, insn_idx, regno, off, size, t); 3051 if (!err && value_regno >= 0) 3052 mark_reg_unknown(env, regs, value_regno); 3053 } else if (reg->type == PTR_TO_TP_BUFFER) { 3054 err = check_tp_buffer_access(env, reg, regno, off, size); 3055 if (!err && t == BPF_READ && value_regno >= 0) 3056 mark_reg_unknown(env, regs, value_regno); 3057 } else if (reg->type == PTR_TO_BTF_ID) { 3058 err = check_ptr_to_btf_access(env, regs, regno, off, size, t, 3059 value_regno); 3060 } else { 3061 verbose(env, "R%d invalid mem access '%s'\n", regno, 3062 reg_type_str[reg->type]); 3063 return -EACCES; 3064 } 3065 3066 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && 3067 regs[value_regno].type == SCALAR_VALUE) { 3068 /* b/h/w load zero-extends, mark upper bits as known 0 */ 3069 coerce_reg_to_size(®s[value_regno], size); 3070 } 3071 return err; 3072 } 3073 3074 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn) 3075 { 3076 int err; 3077 3078 if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) || 3079 insn->imm != 0) { 3080 verbose(env, "BPF_XADD uses reserved fields\n"); 3081 return -EINVAL; 3082 } 3083 3084 /* check src1 operand */ 3085 err = check_reg_arg(env, insn->src_reg, SRC_OP); 3086 if (err) 3087 return err; 3088 3089 /* check src2 operand */ 3090 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 3091 if (err) 3092 return err; 3093 3094 if (is_pointer_value(env, insn->src_reg)) { 3095 verbose(env, "R%d leaks addr into mem\n", insn->src_reg); 3096 return -EACCES; 3097 } 3098 3099 if (is_ctx_reg(env, insn->dst_reg) || 3100 is_pkt_reg(env, insn->dst_reg) || 3101 is_flow_key_reg(env, insn->dst_reg) || 3102 is_sk_reg(env, insn->dst_reg)) { 3103 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", 3104 insn->dst_reg, 3105 reg_type_str[reg_state(env, insn->dst_reg)->type]); 3106 return -EACCES; 3107 } 3108 3109 /* check whether atomic_add can read the memory */ 3110 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, 3111 BPF_SIZE(insn->code), BPF_READ, -1, true); 3112 if (err) 3113 return err; 3114 3115 /* check whether atomic_add can write into the same memory */ 3116 return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, 3117 BPF_SIZE(insn->code), BPF_WRITE, -1, true); 3118 } 3119 3120 static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno, 3121 int off, int access_size, 3122 bool zero_size_allowed) 3123 { 3124 struct bpf_reg_state *reg = reg_state(env, regno); 3125 3126 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || 3127 access_size < 0 || (access_size == 0 && !zero_size_allowed)) { 3128 if (tnum_is_const(reg->var_off)) { 3129 verbose(env, "invalid stack type R%d off=%d access_size=%d\n", 3130 regno, off, access_size); 3131 } else { 3132 char tn_buf[48]; 3133 3134 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 3135 verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n", 3136 regno, tn_buf, access_size); 3137 } 3138 return -EACCES; 3139 } 3140 return 0; 3141 } 3142 3143 /* when register 'regno' is passed into function that will read 'access_size' 3144 * bytes from that pointer, make sure that it's within stack boundary 3145 * and all elements of stack are initialized. 3146 * Unlike most pointer bounds-checking functions, this one doesn't take an 3147 * 'off' argument, so it has to add in reg->off itself. 3148 */ 3149 static int check_stack_boundary(struct bpf_verifier_env *env, int regno, 3150 int access_size, bool zero_size_allowed, 3151 struct bpf_call_arg_meta *meta) 3152 { 3153 struct bpf_reg_state *reg = reg_state(env, regno); 3154 struct bpf_func_state *state = func(env, reg); 3155 int err, min_off, max_off, i, j, slot, spi; 3156 3157 if (reg->type != PTR_TO_STACK) { 3158 /* Allow zero-byte read from NULL, regardless of pointer type */ 3159 if (zero_size_allowed && access_size == 0 && 3160 register_is_null(reg)) 3161 return 0; 3162 3163 verbose(env, "R%d type=%s expected=%s\n", regno, 3164 reg_type_str[reg->type], 3165 reg_type_str[PTR_TO_STACK]); 3166 return -EACCES; 3167 } 3168 3169 if (tnum_is_const(reg->var_off)) { 3170 min_off = max_off = reg->var_off.value + reg->off; 3171 err = __check_stack_boundary(env, regno, min_off, access_size, 3172 zero_size_allowed); 3173 if (err) 3174 return err; 3175 } else { 3176 /* Variable offset is prohibited for unprivileged mode for 3177 * simplicity since it requires corresponding support in 3178 * Spectre masking for stack ALU. 3179 * See also retrieve_ptr_limit(). 3180 */ 3181 if (!env->allow_ptr_leaks) { 3182 char tn_buf[48]; 3183 3184 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 3185 verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n", 3186 regno, tn_buf); 3187 return -EACCES; 3188 } 3189 /* Only initialized buffer on stack is allowed to be accessed 3190 * with variable offset. With uninitialized buffer it's hard to 3191 * guarantee that whole memory is marked as initialized on 3192 * helper return since specific bounds are unknown what may 3193 * cause uninitialized stack leaking. 3194 */ 3195 if (meta && meta->raw_mode) 3196 meta = NULL; 3197 3198 if (reg->smax_value >= BPF_MAX_VAR_OFF || 3199 reg->smax_value <= -BPF_MAX_VAR_OFF) { 3200 verbose(env, "R%d unbounded indirect variable offset stack access\n", 3201 regno); 3202 return -EACCES; 3203 } 3204 min_off = reg->smin_value + reg->off; 3205 max_off = reg->smax_value + reg->off; 3206 err = __check_stack_boundary(env, regno, min_off, access_size, 3207 zero_size_allowed); 3208 if (err) { 3209 verbose(env, "R%d min value is outside of stack bound\n", 3210 regno); 3211 return err; 3212 } 3213 err = __check_stack_boundary(env, regno, max_off, access_size, 3214 zero_size_allowed); 3215 if (err) { 3216 verbose(env, "R%d max value is outside of stack bound\n", 3217 regno); 3218 return err; 3219 } 3220 } 3221 3222 if (meta && meta->raw_mode) { 3223 meta->access_size = access_size; 3224 meta->regno = regno; 3225 return 0; 3226 } 3227 3228 for (i = min_off; i < max_off + access_size; i++) { 3229 u8 *stype; 3230 3231 slot = -i - 1; 3232 spi = slot / BPF_REG_SIZE; 3233 if (state->allocated_stack <= slot) 3234 goto err; 3235 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; 3236 if (*stype == STACK_MISC) 3237 goto mark; 3238 if (*stype == STACK_ZERO) { 3239 /* helper can write anything into the stack */ 3240 *stype = STACK_MISC; 3241 goto mark; 3242 } 3243 if (state->stack[spi].slot_type[0] == STACK_SPILL && 3244 state->stack[spi].spilled_ptr.type == SCALAR_VALUE) { 3245 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); 3246 for (j = 0; j < BPF_REG_SIZE; j++) 3247 state->stack[spi].slot_type[j] = STACK_MISC; 3248 goto mark; 3249 } 3250 3251 err: 3252 if (tnum_is_const(reg->var_off)) { 3253 verbose(env, "invalid indirect read from stack off %d+%d size %d\n", 3254 min_off, i - min_off, access_size); 3255 } else { 3256 char tn_buf[48]; 3257 3258 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 3259 verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n", 3260 tn_buf, i - min_off, access_size); 3261 } 3262 return -EACCES; 3263 mark: 3264 /* reading any byte out of 8-byte 'spill_slot' will cause 3265 * the whole slot to be marked as 'read' 3266 */ 3267 mark_reg_read(env, &state->stack[spi].spilled_ptr, 3268 state->stack[spi].spilled_ptr.parent, 3269 REG_LIVE_READ64); 3270 } 3271 return update_stack_depth(env, state, min_off); 3272 } 3273 3274 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, 3275 int access_size, bool zero_size_allowed, 3276 struct bpf_call_arg_meta *meta) 3277 { 3278 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; 3279 3280 switch (reg->type) { 3281 case PTR_TO_PACKET: 3282 case PTR_TO_PACKET_META: 3283 return check_packet_access(env, regno, reg->off, access_size, 3284 zero_size_allowed); 3285 case PTR_TO_MAP_VALUE: 3286 if (check_map_access_type(env, regno, reg->off, access_size, 3287 meta && meta->raw_mode ? BPF_WRITE : 3288 BPF_READ)) 3289 return -EACCES; 3290 return check_map_access(env, regno, reg->off, access_size, 3291 zero_size_allowed); 3292 default: /* scalar_value|ptr_to_stack or invalid ptr */ 3293 return check_stack_boundary(env, regno, access_size, 3294 zero_size_allowed, meta); 3295 } 3296 } 3297 3298 /* Implementation details: 3299 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL 3300 * Two bpf_map_lookups (even with the same key) will have different reg->id. 3301 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after 3302 * value_or_null->value transition, since the verifier only cares about 3303 * the range of access to valid map value pointer and doesn't care about actual 3304 * address of the map element. 3305 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps 3306 * reg->id > 0 after value_or_null->value transition. By doing so 3307 * two bpf_map_lookups will be considered two different pointers that 3308 * point to different bpf_spin_locks. 3309 * The verifier allows taking only one bpf_spin_lock at a time to avoid 3310 * dead-locks. 3311 * Since only one bpf_spin_lock is allowed the checks are simpler than 3312 * reg_is_refcounted() logic. The verifier needs to remember only 3313 * one spin_lock instead of array of acquired_refs. 3314 * cur_state->active_spin_lock remembers which map value element got locked 3315 * and clears it after bpf_spin_unlock. 3316 */ 3317 static int process_spin_lock(struct bpf_verifier_env *env, int regno, 3318 bool is_lock) 3319 { 3320 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; 3321 struct bpf_verifier_state *cur = env->cur_state; 3322 bool is_const = tnum_is_const(reg->var_off); 3323 struct bpf_map *map = reg->map_ptr; 3324 u64 val = reg->var_off.value; 3325 3326 if (reg->type != PTR_TO_MAP_VALUE) { 3327 verbose(env, "R%d is not a pointer to map_value\n", regno); 3328 return -EINVAL; 3329 } 3330 if (!is_const) { 3331 verbose(env, 3332 "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n", 3333 regno); 3334 return -EINVAL; 3335 } 3336 if (!map->btf) { 3337 verbose(env, 3338 "map '%s' has to have BTF in order to use bpf_spin_lock\n", 3339 map->name); 3340 return -EINVAL; 3341 } 3342 if (!map_value_has_spin_lock(map)) { 3343 if (map->spin_lock_off == -E2BIG) 3344 verbose(env, 3345 "map '%s' has more than one 'struct bpf_spin_lock'\n", 3346 map->name); 3347 else if (map->spin_lock_off == -ENOENT) 3348 verbose(env, 3349 "map '%s' doesn't have 'struct bpf_spin_lock'\n", 3350 map->name); 3351 else 3352 verbose(env, 3353 "map '%s' is not a struct type or bpf_spin_lock is mangled\n", 3354 map->name); 3355 return -EINVAL; 3356 } 3357 if (map->spin_lock_off != val + reg->off) { 3358 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n", 3359 val + reg->off); 3360 return -EINVAL; 3361 } 3362 if (is_lock) { 3363 if (cur->active_spin_lock) { 3364 verbose(env, 3365 "Locking two bpf_spin_locks are not allowed\n"); 3366 return -EINVAL; 3367 } 3368 cur->active_spin_lock = reg->id; 3369 } else { 3370 if (!cur->active_spin_lock) { 3371 verbose(env, "bpf_spin_unlock without taking a lock\n"); 3372 return -EINVAL; 3373 } 3374 if (cur->active_spin_lock != reg->id) { 3375 verbose(env, "bpf_spin_unlock of different lock\n"); 3376 return -EINVAL; 3377 } 3378 cur->active_spin_lock = 0; 3379 } 3380 return 0; 3381 } 3382 3383 static bool arg_type_is_mem_ptr(enum bpf_arg_type type) 3384 { 3385 return type == ARG_PTR_TO_MEM || 3386 type == ARG_PTR_TO_MEM_OR_NULL || 3387 type == ARG_PTR_TO_UNINIT_MEM; 3388 } 3389 3390 static bool arg_type_is_mem_size(enum bpf_arg_type type) 3391 { 3392 return type == ARG_CONST_SIZE || 3393 type == ARG_CONST_SIZE_OR_ZERO; 3394 } 3395 3396 static bool arg_type_is_int_ptr(enum bpf_arg_type type) 3397 { 3398 return type == ARG_PTR_TO_INT || 3399 type == ARG_PTR_TO_LONG; 3400 } 3401 3402 static int int_ptr_type_to_size(enum bpf_arg_type type) 3403 { 3404 if (type == ARG_PTR_TO_INT) 3405 return sizeof(u32); 3406 else if (type == ARG_PTR_TO_LONG) 3407 return sizeof(u64); 3408 3409 return -EINVAL; 3410 } 3411 3412 static int check_func_arg(struct bpf_verifier_env *env, u32 regno, 3413 enum bpf_arg_type arg_type, 3414 struct bpf_call_arg_meta *meta) 3415 { 3416 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; 3417 enum bpf_reg_type expected_type, type = reg->type; 3418 int err = 0; 3419 3420 if (arg_type == ARG_DONTCARE) 3421 return 0; 3422 3423 err = check_reg_arg(env, regno, SRC_OP); 3424 if (err) 3425 return err; 3426 3427 if (arg_type == ARG_ANYTHING) { 3428 if (is_pointer_value(env, regno)) { 3429 verbose(env, "R%d leaks addr into helper function\n", 3430 regno); 3431 return -EACCES; 3432 } 3433 return 0; 3434 } 3435 3436 if (type_is_pkt_pointer(type) && 3437 !may_access_direct_pkt_data(env, meta, BPF_READ)) { 3438 verbose(env, "helper access to the packet is not allowed\n"); 3439 return -EACCES; 3440 } 3441 3442 if (arg_type == ARG_PTR_TO_MAP_KEY || 3443 arg_type == ARG_PTR_TO_MAP_VALUE || 3444 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || 3445 arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { 3446 expected_type = PTR_TO_STACK; 3447 if (register_is_null(reg) && 3448 arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) 3449 /* final test in check_stack_boundary() */; 3450 else if (!type_is_pkt_pointer(type) && 3451 type != PTR_TO_MAP_VALUE && 3452 type != expected_type) 3453 goto err_type; 3454 } else if (arg_type == ARG_CONST_SIZE || 3455 arg_type == ARG_CONST_SIZE_OR_ZERO) { 3456 expected_type = SCALAR_VALUE; 3457 if (type != expected_type) 3458 goto err_type; 3459 } else if (arg_type == ARG_CONST_MAP_PTR) { 3460 expected_type = CONST_PTR_TO_MAP; 3461 if (type != expected_type) 3462 goto err_type; 3463 } else if (arg_type == ARG_PTR_TO_CTX) { 3464 expected_type = PTR_TO_CTX; 3465 if (type != expected_type) 3466 goto err_type; 3467 err = check_ctx_reg(env, reg, regno); 3468 if (err < 0) 3469 return err; 3470 } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) { 3471 expected_type = PTR_TO_SOCK_COMMON; 3472 /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ 3473 if (!type_is_sk_pointer(type)) 3474 goto err_type; 3475 if (reg->ref_obj_id) { 3476 if (meta->ref_obj_id) { 3477 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", 3478 regno, reg->ref_obj_id, 3479 meta->ref_obj_id); 3480 return -EFAULT; 3481 } 3482 meta->ref_obj_id = reg->ref_obj_id; 3483 } 3484 } else if (arg_type == ARG_PTR_TO_SOCKET) { 3485 expected_type = PTR_TO_SOCKET; 3486 if (type != expected_type) 3487 goto err_type; 3488 } else if (arg_type == ARG_PTR_TO_BTF_ID) { 3489 expected_type = PTR_TO_BTF_ID; 3490 if (type != expected_type) 3491 goto err_type; 3492 if (reg->btf_id != meta->btf_id) { 3493 verbose(env, "Helper has type %s got %s in R%d\n", 3494 kernel_type_name(meta->btf_id), 3495 kernel_type_name(reg->btf_id), regno); 3496 3497 return -EACCES; 3498 } 3499 if (!tnum_is_const(reg->var_off) || reg->var_off.value || reg->off) { 3500 verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n", 3501 regno); 3502 return -EACCES; 3503 } 3504 } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { 3505 if (meta->func_id == BPF_FUNC_spin_lock) { 3506 if (process_spin_lock(env, regno, true)) 3507 return -EACCES; 3508 } else if (meta->func_id == BPF_FUNC_spin_unlock) { 3509 if (process_spin_lock(env, regno, false)) 3510 return -EACCES; 3511 } else { 3512 verbose(env, "verifier internal error\n"); 3513 return -EFAULT; 3514 } 3515 } else if (arg_type_is_mem_ptr(arg_type)) { 3516 expected_type = PTR_TO_STACK; 3517 /* One exception here. In case function allows for NULL to be 3518 * passed in as argument, it's a SCALAR_VALUE type. Final test 3519 * happens during stack boundary checking. 3520 */ 3521 if (register_is_null(reg) && 3522 arg_type == ARG_PTR_TO_MEM_OR_NULL) 3523 /* final test in check_stack_boundary() */; 3524 else if (!type_is_pkt_pointer(type) && 3525 type != PTR_TO_MAP_VALUE && 3526 type != expected_type) 3527 goto err_type; 3528 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM; 3529 } else if (arg_type_is_int_ptr(arg_type)) { 3530 expected_type = PTR_TO_STACK; 3531 if (!type_is_pkt_pointer(type) && 3532 type != PTR_TO_MAP_VALUE && 3533 type != expected_type) 3534 goto err_type; 3535 } else { 3536 verbose(env, "unsupported arg_type %d\n", arg_type); 3537 return -EFAULT; 3538 } 3539 3540 if (arg_type == ARG_CONST_MAP_PTR) { 3541 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */ 3542 meta->map_ptr = reg->map_ptr; 3543 } else if (arg_type == ARG_PTR_TO_MAP_KEY) { 3544 /* bpf_map_xxx(..., map_ptr, ..., key) call: 3545 * check that [key, key + map->key_size) are within 3546 * stack limits and initialized 3547 */ 3548 if (!meta->map_ptr) { 3549 /* in function declaration map_ptr must come before 3550 * map_key, so that it's verified and known before 3551 * we have to check map_key here. Otherwise it means 3552 * that kernel subsystem misconfigured verifier 3553 */ 3554 verbose(env, "invalid map_ptr to access map->key\n"); 3555 return -EACCES; 3556 } 3557 err = check_helper_mem_access(env, regno, 3558 meta->map_ptr->key_size, false, 3559 NULL); 3560 } else if (arg_type == ARG_PTR_TO_MAP_VALUE || 3561 (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL && 3562 !register_is_null(reg)) || 3563 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { 3564 /* bpf_map_xxx(..., map_ptr, ..., value) call: 3565 * check [value, value + map->value_size) validity 3566 */ 3567 if (!meta->map_ptr) { 3568 /* kernel subsystem misconfigured verifier */ 3569 verbose(env, "invalid map_ptr to access map->value\n"); 3570 return -EACCES; 3571 } 3572 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE); 3573 err = check_helper_mem_access(env, regno, 3574 meta->map_ptr->value_size, false, 3575 meta); 3576 } else if (arg_type_is_mem_size(arg_type)) { 3577 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); 3578 3579 /* remember the mem_size which may be used later 3580 * to refine return values. 3581 */ 3582 meta->msize_smax_value = reg->smax_value; 3583 meta->msize_umax_value = reg->umax_value; 3584 3585 /* The register is SCALAR_VALUE; the access check 3586 * happens using its boundaries. 3587 */ 3588 if (!tnum_is_const(reg->var_off)) 3589 /* For unprivileged variable accesses, disable raw 3590 * mode so that the program is required to 3591 * initialize all the memory that the helper could 3592 * just partially fill up. 3593 */ 3594 meta = NULL; 3595 3596 if (reg->smin_value < 0) { 3597 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", 3598 regno); 3599 return -EACCES; 3600 } 3601 3602 if (reg->umin_value == 0) { 3603 err = check_helper_mem_access(env, regno - 1, 0, 3604 zero_size_allowed, 3605 meta); 3606 if (err) 3607 return err; 3608 } 3609 3610 if (reg->umax_value >= BPF_MAX_VAR_SIZ) { 3611 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", 3612 regno); 3613 return -EACCES; 3614 } 3615 err = check_helper_mem_access(env, regno - 1, 3616 reg->umax_value, 3617 zero_size_allowed, meta); 3618 if (!err) 3619 err = mark_chain_precision(env, regno); 3620 } else if (arg_type_is_int_ptr(arg_type)) { 3621 int size = int_ptr_type_to_size(arg_type); 3622 3623 err = check_helper_mem_access(env, regno, size, false, meta); 3624 if (err) 3625 return err; 3626 err = check_ptr_alignment(env, reg, 0, size, true); 3627 } 3628 3629 return err; 3630 err_type: 3631 verbose(env, "R%d type=%s expected=%s\n", regno, 3632 reg_type_str[type], reg_type_str[expected_type]); 3633 return -EACCES; 3634 } 3635 3636 static int check_map_func_compatibility(struct bpf_verifier_env *env, 3637 struct bpf_map *map, int func_id) 3638 { 3639 if (!map) 3640 return 0; 3641 3642 /* We need a two way check, first is from map perspective ... */ 3643 switch (map->map_type) { 3644 case BPF_MAP_TYPE_PROG_ARRAY: 3645 if (func_id != BPF_FUNC_tail_call) 3646 goto error; 3647 break; 3648 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 3649 if (func_id != BPF_FUNC_perf_event_read && 3650 func_id != BPF_FUNC_perf_event_output && 3651 func_id != BPF_FUNC_skb_output && 3652 func_id != BPF_FUNC_perf_event_read_value) 3653 goto error; 3654 break; 3655 case BPF_MAP_TYPE_STACK_TRACE: 3656 if (func_id != BPF_FUNC_get_stackid) 3657 goto error; 3658 break; 3659 case BPF_MAP_TYPE_CGROUP_ARRAY: 3660 if (func_id != BPF_FUNC_skb_under_cgroup && 3661 func_id != BPF_FUNC_current_task_under_cgroup) 3662 goto error; 3663 break; 3664 case BPF_MAP_TYPE_CGROUP_STORAGE: 3665 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: 3666 if (func_id != BPF_FUNC_get_local_storage) 3667 goto error; 3668 break; 3669 case BPF_MAP_TYPE_DEVMAP: 3670 case BPF_MAP_TYPE_DEVMAP_HASH: 3671 if (func_id != BPF_FUNC_redirect_map && 3672 func_id != BPF_FUNC_map_lookup_elem) 3673 goto error; 3674 break; 3675 /* Restrict bpf side of cpumap and xskmap, open when use-cases 3676 * appear. 3677 */ 3678 case BPF_MAP_TYPE_CPUMAP: 3679 if (func_id != BPF_FUNC_redirect_map) 3680 goto error; 3681 break; 3682 case BPF_MAP_TYPE_XSKMAP: 3683 if (func_id != BPF_FUNC_redirect_map && 3684 func_id != BPF_FUNC_map_lookup_elem) 3685 goto error; 3686 break; 3687 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 3688 case BPF_MAP_TYPE_HASH_OF_MAPS: 3689 if (func_id != BPF_FUNC_map_lookup_elem) 3690 goto error; 3691 break; 3692 case BPF_MAP_TYPE_SOCKMAP: 3693 if (func_id != BPF_FUNC_sk_redirect_map && 3694 func_id != BPF_FUNC_sock_map_update && 3695 func_id != BPF_FUNC_map_delete_elem && 3696 func_id != BPF_FUNC_msg_redirect_map) 3697 goto error; 3698 break; 3699 case BPF_MAP_TYPE_SOCKHASH: 3700 if (func_id != BPF_FUNC_sk_redirect_hash && 3701 func_id != BPF_FUNC_sock_hash_update && 3702 func_id != BPF_FUNC_map_delete_elem && 3703 func_id != BPF_FUNC_msg_redirect_hash) 3704 goto error; 3705 break; 3706 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: 3707 if (func_id != BPF_FUNC_sk_select_reuseport) 3708 goto error; 3709 break; 3710 case BPF_MAP_TYPE_QUEUE: 3711 case BPF_MAP_TYPE_STACK: 3712 if (func_id != BPF_FUNC_map_peek_elem && 3713 func_id != BPF_FUNC_map_pop_elem && 3714 func_id != BPF_FUNC_map_push_elem) 3715 goto error; 3716 break; 3717 case BPF_MAP_TYPE_SK_STORAGE: 3718 if (func_id != BPF_FUNC_sk_storage_get && 3719 func_id != BPF_FUNC_sk_storage_delete) 3720 goto error; 3721 break; 3722 default: 3723 break; 3724 } 3725 3726 /* ... and second from the function itself. */ 3727 switch (func_id) { 3728 case BPF_FUNC_tail_call: 3729 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) 3730 goto error; 3731 if (env->subprog_cnt > 1) { 3732 verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n"); 3733 return -EINVAL; 3734 } 3735 break; 3736 case BPF_FUNC_perf_event_read: 3737 case BPF_FUNC_perf_event_output: 3738 case BPF_FUNC_perf_event_read_value: 3739 case BPF_FUNC_skb_output: 3740 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) 3741 goto error; 3742 break; 3743 case BPF_FUNC_get_stackid: 3744 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) 3745 goto error; 3746 break; 3747 case BPF_FUNC_current_task_under_cgroup: 3748 case BPF_FUNC_skb_under_cgroup: 3749 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) 3750 goto error; 3751 break; 3752 case BPF_FUNC_redirect_map: 3753 if (map->map_type != BPF_MAP_TYPE_DEVMAP && 3754 map->map_type != BPF_MAP_TYPE_DEVMAP_HASH && 3755 map->map_type != BPF_MAP_TYPE_CPUMAP && 3756 map->map_type != BPF_MAP_TYPE_XSKMAP) 3757 goto error; 3758 break; 3759 case BPF_FUNC_sk_redirect_map: 3760 case BPF_FUNC_msg_redirect_map: 3761 case BPF_FUNC_sock_map_update: 3762 if (map->map_type != BPF_MAP_TYPE_SOCKMAP) 3763 goto error; 3764 break; 3765 case BPF_FUNC_sk_redirect_hash: 3766 case BPF_FUNC_msg_redirect_hash: 3767 case BPF_FUNC_sock_hash_update: 3768 if (map->map_type != BPF_MAP_TYPE_SOCKHASH) 3769 goto error; 3770 break; 3771 case BPF_FUNC_get_local_storage: 3772 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 3773 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 3774 goto error; 3775 break; 3776 case BPF_FUNC_sk_select_reuseport: 3777 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) 3778 goto error; 3779 break; 3780 case BPF_FUNC_map_peek_elem: 3781 case BPF_FUNC_map_pop_elem: 3782 case BPF_FUNC_map_push_elem: 3783 if (map->map_type != BPF_MAP_TYPE_QUEUE && 3784 map->map_type != BPF_MAP_TYPE_STACK) 3785 goto error; 3786 break; 3787 case BPF_FUNC_sk_storage_get: 3788 case BPF_FUNC_sk_storage_delete: 3789 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) 3790 goto error; 3791 break; 3792 default: 3793 break; 3794 } 3795 3796 return 0; 3797 error: 3798 verbose(env, "cannot pass map_type %d into func %s#%d\n", 3799 map->map_type, func_id_name(func_id), func_id); 3800 return -EINVAL; 3801 } 3802 3803 static bool check_raw_mode_ok(const struct bpf_func_proto *fn) 3804 { 3805 int count = 0; 3806 3807 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM) 3808 count++; 3809 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM) 3810 count++; 3811 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM) 3812 count++; 3813 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM) 3814 count++; 3815 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM) 3816 count++; 3817 3818 /* We only support one arg being in raw mode at the moment, 3819 * which is sufficient for the helper functions we have 3820 * right now. 3821 */ 3822 return count <= 1; 3823 } 3824 3825 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, 3826 enum bpf_arg_type arg_next) 3827 { 3828 return (arg_type_is_mem_ptr(arg_curr) && 3829 !arg_type_is_mem_size(arg_next)) || 3830 (!arg_type_is_mem_ptr(arg_curr) && 3831 arg_type_is_mem_size(arg_next)); 3832 } 3833 3834 static bool check_arg_pair_ok(const struct bpf_func_proto *fn) 3835 { 3836 /* bpf_xxx(..., buf, len) call will access 'len' 3837 * bytes from memory 'buf'. Both arg types need 3838 * to be paired, so make sure there's no buggy 3839 * helper function specification. 3840 */ 3841 if (arg_type_is_mem_size(fn->arg1_type) || 3842 arg_type_is_mem_ptr(fn->arg5_type) || 3843 check_args_pair_invalid(fn->arg1_type, fn->arg2_type) || 3844 check_args_pair_invalid(fn->arg2_type, fn->arg3_type) || 3845 check_args_pair_invalid(fn->arg3_type, fn->arg4_type) || 3846 check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) 3847 return false; 3848 3849 return true; 3850 } 3851 3852 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id) 3853 { 3854 int count = 0; 3855 3856 if (arg_type_may_be_refcounted(fn->arg1_type)) 3857 count++; 3858 if (arg_type_may_be_refcounted(fn->arg2_type)) 3859 count++; 3860 if (arg_type_may_be_refcounted(fn->arg3_type)) 3861 count++; 3862 if (arg_type_may_be_refcounted(fn->arg4_type)) 3863 count++; 3864 if (arg_type_may_be_refcounted(fn->arg5_type)) 3865 count++; 3866 3867 /* A reference acquiring function cannot acquire 3868 * another refcounted ptr. 3869 */ 3870 if (is_acquire_function(func_id) && count) 3871 return false; 3872 3873 /* We only support one arg being unreferenced at the moment, 3874 * which is sufficient for the helper functions we have right now. 3875 */ 3876 return count <= 1; 3877 } 3878 3879 static int check_func_proto(const struct bpf_func_proto *fn, int func_id) 3880 { 3881 return check_raw_mode_ok(fn) && 3882 check_arg_pair_ok(fn) && 3883 check_refcount_ok(fn, func_id) ? 0 : -EINVAL; 3884 } 3885 3886 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] 3887 * are now invalid, so turn them into unknown SCALAR_VALUE. 3888 */ 3889 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, 3890 struct bpf_func_state *state) 3891 { 3892 struct bpf_reg_state *regs = state->regs, *reg; 3893 int i; 3894 3895 for (i = 0; i < MAX_BPF_REG; i++) 3896 if (reg_is_pkt_pointer_any(®s[i])) 3897 mark_reg_unknown(env, regs, i); 3898 3899 bpf_for_each_spilled_reg(i, state, reg) { 3900 if (!reg) 3901 continue; 3902 if (reg_is_pkt_pointer_any(reg)) 3903 __mark_reg_unknown(env, reg); 3904 } 3905 } 3906 3907 static void clear_all_pkt_pointers(struct bpf_verifier_env *env) 3908 { 3909 struct bpf_verifier_state *vstate = env->cur_state; 3910 int i; 3911 3912 for (i = 0; i <= vstate->curframe; i++) 3913 __clear_all_pkt_pointers(env, vstate->frame[i]); 3914 } 3915 3916 static void release_reg_references(struct bpf_verifier_env *env, 3917 struct bpf_func_state *state, 3918 int ref_obj_id) 3919 { 3920 struct bpf_reg_state *regs = state->regs, *reg; 3921 int i; 3922 3923 for (i = 0; i < MAX_BPF_REG; i++) 3924 if (regs[i].ref_obj_id == ref_obj_id) 3925 mark_reg_unknown(env, regs, i); 3926 3927 bpf_for_each_spilled_reg(i, state, reg) { 3928 if (!reg) 3929 continue; 3930 if (reg->ref_obj_id == ref_obj_id) 3931 __mark_reg_unknown(env, reg); 3932 } 3933 } 3934 3935 /* The pointer with the specified id has released its reference to kernel 3936 * resources. Identify all copies of the same pointer and clear the reference. 3937 */ 3938 static int release_reference(struct bpf_verifier_env *env, 3939 int ref_obj_id) 3940 { 3941 struct bpf_verifier_state *vstate = env->cur_state; 3942 int err; 3943 int i; 3944 3945 err = release_reference_state(cur_func(env), ref_obj_id); 3946 if (err) 3947 return err; 3948 3949 for (i = 0; i <= vstate->curframe; i++) 3950 release_reg_references(env, vstate->frame[i], ref_obj_id); 3951 3952 return 0; 3953 } 3954 3955 static void clear_caller_saved_regs(struct bpf_verifier_env *env, 3956 struct bpf_reg_state *regs) 3957 { 3958 int i; 3959 3960 /* after the call registers r0 - r5 were scratched */ 3961 for (i = 0; i < CALLER_SAVED_REGS; i++) { 3962 mark_reg_not_init(env, regs, caller_saved[i]); 3963 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 3964 } 3965 } 3966 3967 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 3968 int *insn_idx) 3969 { 3970 struct bpf_verifier_state *state = env->cur_state; 3971 struct bpf_func_info_aux *func_info_aux; 3972 struct bpf_func_state *caller, *callee; 3973 int i, err, subprog, target_insn; 3974 bool is_global = false; 3975 3976 if (state->curframe + 1 >= MAX_CALL_FRAMES) { 3977 verbose(env, "the call stack of %d frames is too deep\n", 3978 state->curframe + 2); 3979 return -E2BIG; 3980 } 3981 3982 target_insn = *insn_idx + insn->imm; 3983 subprog = find_subprog(env, target_insn + 1); 3984 if (subprog < 0) { 3985 verbose(env, "verifier bug. No program starts at insn %d\n", 3986 target_insn + 1); 3987 return -EFAULT; 3988 } 3989 3990 caller = state->frame[state->curframe]; 3991 if (state->frame[state->curframe + 1]) { 3992 verbose(env, "verifier bug. Frame %d already allocated\n", 3993 state->curframe + 1); 3994 return -EFAULT; 3995 } 3996 3997 func_info_aux = env->prog->aux->func_info_aux; 3998 if (func_info_aux) 3999 is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; 4000 err = btf_check_func_arg_match(env, subprog, caller->regs); 4001 if (err == -EFAULT) 4002 return err; 4003 if (is_global) { 4004 if (err) { 4005 verbose(env, "Caller passes invalid args into func#%d\n", 4006 subprog); 4007 return err; 4008 } else { 4009 if (env->log.level & BPF_LOG_LEVEL) 4010 verbose(env, 4011 "Func#%d is global and valid. Skipping.\n", 4012 subprog); 4013 clear_caller_saved_regs(env, caller->regs); 4014 4015 /* All global functions return SCALAR_VALUE */ 4016 mark_reg_unknown(env, caller->regs, BPF_REG_0); 4017 4018 /* continue with next insn after call */ 4019 return 0; 4020 } 4021 } 4022 4023 callee = kzalloc(sizeof(*callee), GFP_KERNEL); 4024 if (!callee) 4025 return -ENOMEM; 4026 state->frame[state->curframe + 1] = callee; 4027 4028 /* callee cannot access r0, r6 - r9 for reading and has to write 4029 * into its own stack before reading from it. 4030 * callee can read/write into caller's stack 4031 */ 4032 init_func_state(env, callee, 4033 /* remember the callsite, it will be used by bpf_exit */ 4034 *insn_idx /* callsite */, 4035 state->curframe + 1 /* frameno within this callchain */, 4036 subprog /* subprog number within this prog */); 4037 4038 /* Transfer references to the callee */ 4039 err = transfer_reference_state(callee, caller); 4040 if (err) 4041 return err; 4042 4043 /* copy r1 - r5 args that callee can access. The copy includes parent 4044 * pointers, which connects us up to the liveness chain 4045 */ 4046 for (i = BPF_REG_1; i <= BPF_REG_5; i++) 4047 callee->regs[i] = caller->regs[i]; 4048 4049 clear_caller_saved_regs(env, caller->regs); 4050 4051 /* only increment it after check_reg_arg() finished */ 4052 state->curframe++; 4053 4054 /* and go analyze first insn of the callee */ 4055 *insn_idx = target_insn; 4056 4057 if (env->log.level & BPF_LOG_LEVEL) { 4058 verbose(env, "caller:\n"); 4059 print_verifier_state(env, caller); 4060 verbose(env, "callee:\n"); 4061 print_verifier_state(env, callee); 4062 } 4063 return 0; 4064 } 4065 4066 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) 4067 { 4068 struct bpf_verifier_state *state = env->cur_state; 4069 struct bpf_func_state *caller, *callee; 4070 struct bpf_reg_state *r0; 4071 int err; 4072 4073 callee = state->frame[state->curframe]; 4074 r0 = &callee->regs[BPF_REG_0]; 4075 if (r0->type == PTR_TO_STACK) { 4076 /* technically it's ok to return caller's stack pointer 4077 * (or caller's caller's pointer) back to the caller, 4078 * since these pointers are valid. Only current stack 4079 * pointer will be invalid as soon as function exits, 4080 * but let's be conservative 4081 */ 4082 verbose(env, "cannot return stack pointer to the caller\n"); 4083 return -EINVAL; 4084 } 4085 4086 state->curframe--; 4087 caller = state->frame[state->curframe]; 4088 /* return to the caller whatever r0 had in the callee */ 4089 caller->regs[BPF_REG_0] = *r0; 4090 4091 /* Transfer references to the caller */ 4092 err = transfer_reference_state(caller, callee); 4093 if (err) 4094 return err; 4095 4096 *insn_idx = callee->callsite + 1; 4097 if (env->log.level & BPF_LOG_LEVEL) { 4098 verbose(env, "returning from callee:\n"); 4099 print_verifier_state(env, callee); 4100 verbose(env, "to caller at %d:\n", *insn_idx); 4101 print_verifier_state(env, caller); 4102 } 4103 /* clear everything in the callee */ 4104 free_func_state(callee); 4105 state->frame[state->curframe + 1] = NULL; 4106 return 0; 4107 } 4108 4109 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, 4110 int func_id, 4111 struct bpf_call_arg_meta *meta) 4112 { 4113 struct bpf_reg_state *ret_reg = ®s[BPF_REG_0]; 4114 4115 if (ret_type != RET_INTEGER || 4116 (func_id != BPF_FUNC_get_stack && 4117 func_id != BPF_FUNC_probe_read_str)) 4118 return; 4119 4120 ret_reg->smax_value = meta->msize_smax_value; 4121 ret_reg->umax_value = meta->msize_umax_value; 4122 __reg_deduce_bounds(ret_reg); 4123 __reg_bound_offset(ret_reg); 4124 } 4125 4126 static int 4127 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, 4128 int func_id, int insn_idx) 4129 { 4130 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; 4131 struct bpf_map *map = meta->map_ptr; 4132 4133 if (func_id != BPF_FUNC_tail_call && 4134 func_id != BPF_FUNC_map_lookup_elem && 4135 func_id != BPF_FUNC_map_update_elem && 4136 func_id != BPF_FUNC_map_delete_elem && 4137 func_id != BPF_FUNC_map_push_elem && 4138 func_id != BPF_FUNC_map_pop_elem && 4139 func_id != BPF_FUNC_map_peek_elem) 4140 return 0; 4141 4142 if (map == NULL) { 4143 verbose(env, "kernel subsystem misconfigured verifier\n"); 4144 return -EINVAL; 4145 } 4146 4147 /* In case of read-only, some additional restrictions 4148 * need to be applied in order to prevent altering the 4149 * state of the map from program side. 4150 */ 4151 if ((map->map_flags & BPF_F_RDONLY_PROG) && 4152 (func_id == BPF_FUNC_map_delete_elem || 4153 func_id == BPF_FUNC_map_update_elem || 4154 func_id == BPF_FUNC_map_push_elem || 4155 func_id == BPF_FUNC_map_pop_elem)) { 4156 verbose(env, "write into map forbidden\n"); 4157 return -EACCES; 4158 } 4159 4160 if (!BPF_MAP_PTR(aux->map_ptr_state)) 4161 bpf_map_ptr_store(aux, meta->map_ptr, 4162 meta->map_ptr->unpriv_array); 4163 else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr) 4164 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON, 4165 meta->map_ptr->unpriv_array); 4166 return 0; 4167 } 4168 4169 static int 4170 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, 4171 int func_id, int insn_idx) 4172 { 4173 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; 4174 struct bpf_reg_state *regs = cur_regs(env), *reg; 4175 struct bpf_map *map = meta->map_ptr; 4176 struct tnum range; 4177 u64 val; 4178 int err; 4179 4180 if (func_id != BPF_FUNC_tail_call) 4181 return 0; 4182 if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) { 4183 verbose(env, "kernel subsystem misconfigured verifier\n"); 4184 return -EINVAL; 4185 } 4186 4187 range = tnum_range(0, map->max_entries - 1); 4188 reg = ®s[BPF_REG_3]; 4189 4190 if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) { 4191 bpf_map_key_store(aux, BPF_MAP_KEY_POISON); 4192 return 0; 4193 } 4194 4195 err = mark_chain_precision(env, BPF_REG_3); 4196 if (err) 4197 return err; 4198 4199 val = reg->var_off.value; 4200 if (bpf_map_key_unseen(aux)) 4201 bpf_map_key_store(aux, val); 4202 else if (!bpf_map_key_poisoned(aux) && 4203 bpf_map_key_immediate(aux) != val) 4204 bpf_map_key_store(aux, BPF_MAP_KEY_POISON); 4205 return 0; 4206 } 4207 4208 static int check_reference_leak(struct bpf_verifier_env *env) 4209 { 4210 struct bpf_func_state *state = cur_func(env); 4211 int i; 4212 4213 for (i = 0; i < state->acquired_refs; i++) { 4214 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", 4215 state->refs[i].id, state->refs[i].insn_idx); 4216 } 4217 return state->acquired_refs ? -EINVAL : 0; 4218 } 4219 4220 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) 4221 { 4222 const struct bpf_func_proto *fn = NULL; 4223 struct bpf_reg_state *regs; 4224 struct bpf_call_arg_meta meta; 4225 bool changes_data; 4226 int i, err; 4227 4228 /* find function prototype */ 4229 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { 4230 verbose(env, "invalid func %s#%d\n", func_id_name(func_id), 4231 func_id); 4232 return -EINVAL; 4233 } 4234 4235 if (env->ops->get_func_proto) 4236 fn = env->ops->get_func_proto(func_id, env->prog); 4237 if (!fn) { 4238 verbose(env, "unknown func %s#%d\n", func_id_name(func_id), 4239 func_id); 4240 return -EINVAL; 4241 } 4242 4243 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 4244 if (!env->prog->gpl_compatible && fn->gpl_only) { 4245 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n"); 4246 return -EINVAL; 4247 } 4248 4249 /* With LD_ABS/IND some JITs save/restore skb from r1. */ 4250 changes_data = bpf_helper_changes_pkt_data(fn->func); 4251 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { 4252 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n", 4253 func_id_name(func_id), func_id); 4254 return -EINVAL; 4255 } 4256 4257 memset(&meta, 0, sizeof(meta)); 4258 meta.pkt_access = fn->pkt_access; 4259 4260 err = check_func_proto(fn, func_id); 4261 if (err) { 4262 verbose(env, "kernel subsystem misconfigured func %s#%d\n", 4263 func_id_name(func_id), func_id); 4264 return err; 4265 } 4266 4267 meta.func_id = func_id; 4268 /* check args */ 4269 for (i = 0; i < 5; i++) { 4270 err = btf_resolve_helper_id(&env->log, fn, i); 4271 if (err > 0) 4272 meta.btf_id = err; 4273 err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta); 4274 if (err) 4275 return err; 4276 } 4277 4278 err = record_func_map(env, &meta, func_id, insn_idx); 4279 if (err) 4280 return err; 4281 4282 err = record_func_key(env, &meta, func_id, insn_idx); 4283 if (err) 4284 return err; 4285 4286 /* Mark slots with STACK_MISC in case of raw mode, stack offset 4287 * is inferred from register state. 4288 */ 4289 for (i = 0; i < meta.access_size; i++) { 4290 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, 4291 BPF_WRITE, -1, false); 4292 if (err) 4293 return err; 4294 } 4295 4296 if (func_id == BPF_FUNC_tail_call) { 4297 err = check_reference_leak(env); 4298 if (err) { 4299 verbose(env, "tail_call would lead to reference leak\n"); 4300 return err; 4301 } 4302 } else if (is_release_function(func_id)) { 4303 err = release_reference(env, meta.ref_obj_id); 4304 if (err) { 4305 verbose(env, "func %s#%d reference has not been acquired before\n", 4306 func_id_name(func_id), func_id); 4307 return err; 4308 } 4309 } 4310 4311 regs = cur_regs(env); 4312 4313 /* check that flags argument in get_local_storage(map, flags) is 0, 4314 * this is required because get_local_storage() can't return an error. 4315 */ 4316 if (func_id == BPF_FUNC_get_local_storage && 4317 !register_is_null(®s[BPF_REG_2])) { 4318 verbose(env, "get_local_storage() doesn't support non-zero flags\n"); 4319 return -EINVAL; 4320 } 4321 4322 /* reset caller saved regs */ 4323 for (i = 0; i < CALLER_SAVED_REGS; i++) { 4324 mark_reg_not_init(env, regs, caller_saved[i]); 4325 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 4326 } 4327 4328 /* helper call returns 64-bit value. */ 4329 regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; 4330 4331 /* update return register (already marked as written above) */ 4332 if (fn->ret_type == RET_INTEGER) { 4333 /* sets type to SCALAR_VALUE */ 4334 mark_reg_unknown(env, regs, BPF_REG_0); 4335 } else if (fn->ret_type == RET_VOID) { 4336 regs[BPF_REG_0].type = NOT_INIT; 4337 } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL || 4338 fn->ret_type == RET_PTR_TO_MAP_VALUE) { 4339 /* There is no offset yet applied, variable or fixed */ 4340 mark_reg_known_zero(env, regs, BPF_REG_0); 4341 /* remember map_ptr, so that check_map_access() 4342 * can check 'value_size' boundary of memory access 4343 * to map element returned from bpf_map_lookup_elem() 4344 */ 4345 if (meta.map_ptr == NULL) { 4346 verbose(env, 4347 "kernel subsystem misconfigured verifier\n"); 4348 return -EINVAL; 4349 } 4350 regs[BPF_REG_0].map_ptr = meta.map_ptr; 4351 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { 4352 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; 4353 if (map_value_has_spin_lock(meta.map_ptr)) 4354 regs[BPF_REG_0].id = ++env->id_gen; 4355 } else { 4356 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; 4357 regs[BPF_REG_0].id = ++env->id_gen; 4358 } 4359 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { 4360 mark_reg_known_zero(env, regs, BPF_REG_0); 4361 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; 4362 regs[BPF_REG_0].id = ++env->id_gen; 4363 } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { 4364 mark_reg_known_zero(env, regs, BPF_REG_0); 4365 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; 4366 regs[BPF_REG_0].id = ++env->id_gen; 4367 } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { 4368 mark_reg_known_zero(env, regs, BPF_REG_0); 4369 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; 4370 regs[BPF_REG_0].id = ++env->id_gen; 4371 } else { 4372 verbose(env, "unknown return type %d of func %s#%d\n", 4373 fn->ret_type, func_id_name(func_id), func_id); 4374 return -EINVAL; 4375 } 4376 4377 if (is_ptr_cast_function(func_id)) { 4378 /* For release_reference() */ 4379 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; 4380 } else if (is_acquire_function(func_id)) { 4381 int id = acquire_reference_state(env, insn_idx); 4382 4383 if (id < 0) 4384 return id; 4385 /* For mark_ptr_or_null_reg() */ 4386 regs[BPF_REG_0].id = id; 4387 /* For release_reference() */ 4388 regs[BPF_REG_0].ref_obj_id = id; 4389 } 4390 4391 do_refine_retval_range(regs, fn->ret_type, func_id, &meta); 4392 4393 err = check_map_func_compatibility(env, meta.map_ptr, func_id); 4394 if (err) 4395 return err; 4396 4397 if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) { 4398 const char *err_str; 4399 4400 #ifdef CONFIG_PERF_EVENTS 4401 err = get_callchain_buffers(sysctl_perf_event_max_stack); 4402 err_str = "cannot get callchain buffer for func %s#%d\n"; 4403 #else 4404 err = -ENOTSUPP; 4405 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n"; 4406 #endif 4407 if (err) { 4408 verbose(env, err_str, func_id_name(func_id), func_id); 4409 return err; 4410 } 4411 4412 env->prog->has_callchain_buf = true; 4413 } 4414 4415 if (changes_data) 4416 clear_all_pkt_pointers(env); 4417 return 0; 4418 } 4419 4420 static bool signed_add_overflows(s64 a, s64 b) 4421 { 4422 /* Do the add in u64, where overflow is well-defined */ 4423 s64 res = (s64)((u64)a + (u64)b); 4424 4425 if (b < 0) 4426 return res > a; 4427 return res < a; 4428 } 4429 4430 static bool signed_sub_overflows(s64 a, s64 b) 4431 { 4432 /* Do the sub in u64, where overflow is well-defined */ 4433 s64 res = (s64)((u64)a - (u64)b); 4434 4435 if (b < 0) 4436 return res < a; 4437 return res > a; 4438 } 4439 4440 static bool check_reg_sane_offset(struct bpf_verifier_env *env, 4441 const struct bpf_reg_state *reg, 4442 enum bpf_reg_type type) 4443 { 4444 bool known = tnum_is_const(reg->var_off); 4445 s64 val = reg->var_off.value; 4446 s64 smin = reg->smin_value; 4447 4448 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { 4449 verbose(env, "math between %s pointer and %lld is not allowed\n", 4450 reg_type_str[type], val); 4451 return false; 4452 } 4453 4454 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { 4455 verbose(env, "%s pointer offset %d is not allowed\n", 4456 reg_type_str[type], reg->off); 4457 return false; 4458 } 4459 4460 if (smin == S64_MIN) { 4461 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", 4462 reg_type_str[type]); 4463 return false; 4464 } 4465 4466 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { 4467 verbose(env, "value %lld makes %s pointer be out of bounds\n", 4468 smin, reg_type_str[type]); 4469 return false; 4470 } 4471 4472 return true; 4473 } 4474 4475 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) 4476 { 4477 return &env->insn_aux_data[env->insn_idx]; 4478 } 4479 4480 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, 4481 u32 *ptr_limit, u8 opcode, bool off_is_neg) 4482 { 4483 bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || 4484 (opcode == BPF_SUB && !off_is_neg); 4485 u32 off; 4486 4487 switch (ptr_reg->type) { 4488 case PTR_TO_STACK: 4489 /* Indirect variable offset stack access is prohibited in 4490 * unprivileged mode so it's not handled here. 4491 */ 4492 off = ptr_reg->off + ptr_reg->var_off.value; 4493 if (mask_to_left) 4494 *ptr_limit = MAX_BPF_STACK + off; 4495 else 4496 *ptr_limit = -off; 4497 return 0; 4498 case PTR_TO_MAP_VALUE: 4499 if (mask_to_left) { 4500 *ptr_limit = ptr_reg->umax_value + ptr_reg->off; 4501 } else { 4502 off = ptr_reg->smin_value + ptr_reg->off; 4503 *ptr_limit = ptr_reg->map_ptr->value_size - off; 4504 } 4505 return 0; 4506 default: 4507 return -EINVAL; 4508 } 4509 } 4510 4511 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, 4512 const struct bpf_insn *insn) 4513 { 4514 return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K; 4515 } 4516 4517 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, 4518 u32 alu_state, u32 alu_limit) 4519 { 4520 /* If we arrived here from different branches with different 4521 * state or limits to sanitize, then this won't work. 4522 */ 4523 if (aux->alu_state && 4524 (aux->alu_state != alu_state || 4525 aux->alu_limit != alu_limit)) 4526 return -EACCES; 4527 4528 /* Corresponding fixup done in fixup_bpf_calls(). */ 4529 aux->alu_state = alu_state; 4530 aux->alu_limit = alu_limit; 4531 return 0; 4532 } 4533 4534 static int sanitize_val_alu(struct bpf_verifier_env *env, 4535 struct bpf_insn *insn) 4536 { 4537 struct bpf_insn_aux_data *aux = cur_aux(env); 4538 4539 if (can_skip_alu_sanitation(env, insn)) 4540 return 0; 4541 4542 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); 4543 } 4544 4545 static int sanitize_ptr_alu(struct bpf_verifier_env *env, 4546 struct bpf_insn *insn, 4547 const struct bpf_reg_state *ptr_reg, 4548 struct bpf_reg_state *dst_reg, 4549 bool off_is_neg) 4550 { 4551 struct bpf_verifier_state *vstate = env->cur_state; 4552 struct bpf_insn_aux_data *aux = cur_aux(env); 4553 bool ptr_is_dst_reg = ptr_reg == dst_reg; 4554 u8 opcode = BPF_OP(insn->code); 4555 u32 alu_state, alu_limit; 4556 struct bpf_reg_state tmp; 4557 bool ret; 4558 4559 if (can_skip_alu_sanitation(env, insn)) 4560 return 0; 4561 4562 /* We already marked aux for masking from non-speculative 4563 * paths, thus we got here in the first place. We only care 4564 * to explore bad access from here. 4565 */ 4566 if (vstate->speculative) 4567 goto do_sim; 4568 4569 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; 4570 alu_state |= ptr_is_dst_reg ? 4571 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; 4572 4573 if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) 4574 return 0; 4575 if (update_alu_sanitation_state(aux, alu_state, alu_limit)) 4576 return -EACCES; 4577 do_sim: 4578 /* Simulate and find potential out-of-bounds access under 4579 * speculative execution from truncation as a result of 4580 * masking when off was not within expected range. If off 4581 * sits in dst, then we temporarily need to move ptr there 4582 * to simulate dst (== 0) +/-= ptr. Needed, for example, 4583 * for cases where we use K-based arithmetic in one direction 4584 * and truncated reg-based in the other in order to explore 4585 * bad access. 4586 */ 4587 if (!ptr_is_dst_reg) { 4588 tmp = *dst_reg; 4589 *dst_reg = *ptr_reg; 4590 } 4591 ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); 4592 if (!ptr_is_dst_reg && ret) 4593 *dst_reg = tmp; 4594 return !ret ? -EFAULT : 0; 4595 } 4596 4597 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. 4598 * Caller should also handle BPF_MOV case separately. 4599 * If we return -EACCES, caller may want to try again treating pointer as a 4600 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks. 4601 */ 4602 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, 4603 struct bpf_insn *insn, 4604 const struct bpf_reg_state *ptr_reg, 4605 const struct bpf_reg_state *off_reg) 4606 { 4607 struct bpf_verifier_state *vstate = env->cur_state; 4608 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 4609 struct bpf_reg_state *regs = state->regs, *dst_reg; 4610 bool known = tnum_is_const(off_reg->var_off); 4611 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value, 4612 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; 4613 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, 4614 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; 4615 u32 dst = insn->dst_reg, src = insn->src_reg; 4616 u8 opcode = BPF_OP(insn->code); 4617 int ret; 4618 4619 dst_reg = ®s[dst]; 4620 4621 if ((known && (smin_val != smax_val || umin_val != umax_val)) || 4622 smin_val > smax_val || umin_val > umax_val) { 4623 /* Taint dst register if offset had invalid bounds derived from 4624 * e.g. dead branches. 4625 */ 4626 __mark_reg_unknown(env, dst_reg); 4627 return 0; 4628 } 4629 4630 if (BPF_CLASS(insn->code) != BPF_ALU64) { 4631 /* 32-bit ALU ops on pointers produce (meaningless) scalars */ 4632 verbose(env, 4633 "R%d 32-bit pointer arithmetic prohibited\n", 4634 dst); 4635 return -EACCES; 4636 } 4637 4638 switch (ptr_reg->type) { 4639 case PTR_TO_MAP_VALUE_OR_NULL: 4640 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", 4641 dst, reg_type_str[ptr_reg->type]); 4642 return -EACCES; 4643 case CONST_PTR_TO_MAP: 4644 case PTR_TO_PACKET_END: 4645 case PTR_TO_SOCKET: 4646 case PTR_TO_SOCKET_OR_NULL: 4647 case PTR_TO_SOCK_COMMON: 4648 case PTR_TO_SOCK_COMMON_OR_NULL: 4649 case PTR_TO_TCP_SOCK: 4650 case PTR_TO_TCP_SOCK_OR_NULL: 4651 case PTR_TO_XDP_SOCK: 4652 verbose(env, "R%d pointer arithmetic on %s prohibited\n", 4653 dst, reg_type_str[ptr_reg->type]); 4654 return -EACCES; 4655 case PTR_TO_MAP_VALUE: 4656 if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) { 4657 verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", 4658 off_reg == dst_reg ? dst : src); 4659 return -EACCES; 4660 } 4661 /* fall-through */ 4662 default: 4663 break; 4664 } 4665 4666 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id. 4667 * The id may be overwritten later if we create a new variable offset. 4668 */ 4669 dst_reg->type = ptr_reg->type; 4670 dst_reg->id = ptr_reg->id; 4671 4672 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) || 4673 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) 4674 return -EINVAL; 4675 4676 switch (opcode) { 4677 case BPF_ADD: 4678 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); 4679 if (ret < 0) { 4680 verbose(env, "R%d tried to add from different maps or paths\n", dst); 4681 return ret; 4682 } 4683 /* We can take a fixed offset as long as it doesn't overflow 4684 * the s32 'off' field 4685 */ 4686 if (known && (ptr_reg->off + smin_val == 4687 (s64)(s32)(ptr_reg->off + smin_val))) { 4688 /* pointer += K. Accumulate it into fixed offset */ 4689 dst_reg->smin_value = smin_ptr; 4690 dst_reg->smax_value = smax_ptr; 4691 dst_reg->umin_value = umin_ptr; 4692 dst_reg->umax_value = umax_ptr; 4693 dst_reg->var_off = ptr_reg->var_off; 4694 dst_reg->off = ptr_reg->off + smin_val; 4695 dst_reg->raw = ptr_reg->raw; 4696 break; 4697 } 4698 /* A new variable offset is created. Note that off_reg->off 4699 * == 0, since it's a scalar. 4700 * dst_reg gets the pointer type and since some positive 4701 * integer value was added to the pointer, give it a new 'id' 4702 * if it's a PTR_TO_PACKET. 4703 * this creates a new 'base' pointer, off_reg (variable) gets 4704 * added into the variable offset, and we copy the fixed offset 4705 * from ptr_reg. 4706 */ 4707 if (signed_add_overflows(smin_ptr, smin_val) || 4708 signed_add_overflows(smax_ptr, smax_val)) { 4709 dst_reg->smin_value = S64_MIN; 4710 dst_reg->smax_value = S64_MAX; 4711 } else { 4712 dst_reg->smin_value = smin_ptr + smin_val; 4713 dst_reg->smax_value = smax_ptr + smax_val; 4714 } 4715 if (umin_ptr + umin_val < umin_ptr || 4716 umax_ptr + umax_val < umax_ptr) { 4717 dst_reg->umin_value = 0; 4718 dst_reg->umax_value = U64_MAX; 4719 } else { 4720 dst_reg->umin_value = umin_ptr + umin_val; 4721 dst_reg->umax_value = umax_ptr + umax_val; 4722 } 4723 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off); 4724 dst_reg->off = ptr_reg->off; 4725 dst_reg->raw = ptr_reg->raw; 4726 if (reg_is_pkt_pointer(ptr_reg)) { 4727 dst_reg->id = ++env->id_gen; 4728 /* something was added to pkt_ptr, set range to zero */ 4729 dst_reg->raw = 0; 4730 } 4731 break; 4732 case BPF_SUB: 4733 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); 4734 if (ret < 0) { 4735 verbose(env, "R%d tried to sub from different maps or paths\n", dst); 4736 return ret; 4737 } 4738 if (dst_reg == off_reg) { 4739 /* scalar -= pointer. Creates an unknown scalar */ 4740 verbose(env, "R%d tried to subtract pointer from scalar\n", 4741 dst); 4742 return -EACCES; 4743 } 4744 /* We don't allow subtraction from FP, because (according to 4745 * test_verifier.c test "invalid fp arithmetic", JITs might not 4746 * be able to deal with it. 4747 */ 4748 if (ptr_reg->type == PTR_TO_STACK) { 4749 verbose(env, "R%d subtraction from stack pointer prohibited\n", 4750 dst); 4751 return -EACCES; 4752 } 4753 if (known && (ptr_reg->off - smin_val == 4754 (s64)(s32)(ptr_reg->off - smin_val))) { 4755 /* pointer -= K. Subtract it from fixed offset */ 4756 dst_reg->smin_value = smin_ptr; 4757 dst_reg->smax_value = smax_ptr; 4758 dst_reg->umin_value = umin_ptr; 4759 dst_reg->umax_value = umax_ptr; 4760 dst_reg->var_off = ptr_reg->var_off; 4761 dst_reg->id = ptr_reg->id; 4762 dst_reg->off = ptr_reg->off - smin_val; 4763 dst_reg->raw = ptr_reg->raw; 4764 break; 4765 } 4766 /* A new variable offset is created. If the subtrahend is known 4767 * nonnegative, then any reg->range we had before is still good. 4768 */ 4769 if (signed_sub_overflows(smin_ptr, smax_val) || 4770 signed_sub_overflows(smax_ptr, smin_val)) { 4771 /* Overflow possible, we know nothing */ 4772 dst_reg->smin_value = S64_MIN; 4773 dst_reg->smax_value = S64_MAX; 4774 } else { 4775 dst_reg->smin_value = smin_ptr - smax_val; 4776 dst_reg->smax_value = smax_ptr - smin_val; 4777 } 4778 if (umin_ptr < umax_val) { 4779 /* Overflow possible, we know nothing */ 4780 dst_reg->umin_value = 0; 4781 dst_reg->umax_value = U64_MAX; 4782 } else { 4783 /* Cannot overflow (as long as bounds are consistent) */ 4784 dst_reg->umin_value = umin_ptr - umax_val; 4785 dst_reg->umax_value = umax_ptr - umin_val; 4786 } 4787 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off); 4788 dst_reg->off = ptr_reg->off; 4789 dst_reg->raw = ptr_reg->raw; 4790 if (reg_is_pkt_pointer(ptr_reg)) { 4791 dst_reg->id = ++env->id_gen; 4792 /* something was added to pkt_ptr, set range to zero */ 4793 if (smin_val < 0) 4794 dst_reg->raw = 0; 4795 } 4796 break; 4797 case BPF_AND: 4798 case BPF_OR: 4799 case BPF_XOR: 4800 /* bitwise ops on pointers are troublesome, prohibit. */ 4801 verbose(env, "R%d bitwise operator %s on pointer prohibited\n", 4802 dst, bpf_alu_string[opcode >> 4]); 4803 return -EACCES; 4804 default: 4805 /* other operators (e.g. MUL,LSH) produce non-pointer results */ 4806 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", 4807 dst, bpf_alu_string[opcode >> 4]); 4808 return -EACCES; 4809 } 4810 4811 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) 4812 return -EINVAL; 4813 4814 __update_reg_bounds(dst_reg); 4815 __reg_deduce_bounds(dst_reg); 4816 __reg_bound_offset(dst_reg); 4817 4818 /* For unprivileged we require that resulting offset must be in bounds 4819 * in order to be able to sanitize access later on. 4820 */ 4821 if (!env->allow_ptr_leaks) { 4822 if (dst_reg->type == PTR_TO_MAP_VALUE && 4823 check_map_access(env, dst, dst_reg->off, 1, false)) { 4824 verbose(env, "R%d pointer arithmetic of map value goes out of range, " 4825 "prohibited for !root\n", dst); 4826 return -EACCES; 4827 } else if (dst_reg->type == PTR_TO_STACK && 4828 check_stack_access(env, dst_reg, dst_reg->off + 4829 dst_reg->var_off.value, 1)) { 4830 verbose(env, "R%d stack pointer arithmetic goes out of range, " 4831 "prohibited for !root\n", dst); 4832 return -EACCES; 4833 } 4834 } 4835 4836 return 0; 4837 } 4838 4839 /* WARNING: This function does calculations on 64-bit values, but the actual 4840 * execution may occur on 32-bit values. Therefore, things like bitshifts 4841 * need extra checks in the 32-bit case. 4842 */ 4843 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, 4844 struct bpf_insn *insn, 4845 struct bpf_reg_state *dst_reg, 4846 struct bpf_reg_state src_reg) 4847 { 4848 struct bpf_reg_state *regs = cur_regs(env); 4849 u8 opcode = BPF_OP(insn->code); 4850 bool src_known, dst_known; 4851 s64 smin_val, smax_val; 4852 u64 umin_val, umax_val; 4853 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; 4854 u32 dst = insn->dst_reg; 4855 int ret; 4856 4857 if (insn_bitness == 32) { 4858 /* Relevant for 32-bit RSH: Information can propagate towards 4859 * LSB, so it isn't sufficient to only truncate the output to 4860 * 32 bits. 4861 */ 4862 coerce_reg_to_size(dst_reg, 4); 4863 coerce_reg_to_size(&src_reg, 4); 4864 } 4865 4866 smin_val = src_reg.smin_value; 4867 smax_val = src_reg.smax_value; 4868 umin_val = src_reg.umin_value; 4869 umax_val = src_reg.umax_value; 4870 src_known = tnum_is_const(src_reg.var_off); 4871 dst_known = tnum_is_const(dst_reg->var_off); 4872 4873 if ((src_known && (smin_val != smax_val || umin_val != umax_val)) || 4874 smin_val > smax_val || umin_val > umax_val) { 4875 /* Taint dst register if offset had invalid bounds derived from 4876 * e.g. dead branches. 4877 */ 4878 __mark_reg_unknown(env, dst_reg); 4879 return 0; 4880 } 4881 4882 if (!src_known && 4883 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { 4884 __mark_reg_unknown(env, dst_reg); 4885 return 0; 4886 } 4887 4888 switch (opcode) { 4889 case BPF_ADD: 4890 ret = sanitize_val_alu(env, insn); 4891 if (ret < 0) { 4892 verbose(env, "R%d tried to add from different pointers or scalars\n", dst); 4893 return ret; 4894 } 4895 if (signed_add_overflows(dst_reg->smin_value, smin_val) || 4896 signed_add_overflows(dst_reg->smax_value, smax_val)) { 4897 dst_reg->smin_value = S64_MIN; 4898 dst_reg->smax_value = S64_MAX; 4899 } else { 4900 dst_reg->smin_value += smin_val; 4901 dst_reg->smax_value += smax_val; 4902 } 4903 if (dst_reg->umin_value + umin_val < umin_val || 4904 dst_reg->umax_value + umax_val < umax_val) { 4905 dst_reg->umin_value = 0; 4906 dst_reg->umax_value = U64_MAX; 4907 } else { 4908 dst_reg->umin_value += umin_val; 4909 dst_reg->umax_value += umax_val; 4910 } 4911 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); 4912 break; 4913 case BPF_SUB: 4914 ret = sanitize_val_alu(env, insn); 4915 if (ret < 0) { 4916 verbose(env, "R%d tried to sub from different pointers or scalars\n", dst); 4917 return ret; 4918 } 4919 if (signed_sub_overflows(dst_reg->smin_value, smax_val) || 4920 signed_sub_overflows(dst_reg->smax_value, smin_val)) { 4921 /* Overflow possible, we know nothing */ 4922 dst_reg->smin_value = S64_MIN; 4923 dst_reg->smax_value = S64_MAX; 4924 } else { 4925 dst_reg->smin_value -= smax_val; 4926 dst_reg->smax_value -= smin_val; 4927 } 4928 if (dst_reg->umin_value < umax_val) { 4929 /* Overflow possible, we know nothing */ 4930 dst_reg->umin_value = 0; 4931 dst_reg->umax_value = U64_MAX; 4932 } else { 4933 /* Cannot overflow (as long as bounds are consistent) */ 4934 dst_reg->umin_value -= umax_val; 4935 dst_reg->umax_value -= umin_val; 4936 } 4937 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off); 4938 break; 4939 case BPF_MUL: 4940 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off); 4941 if (smin_val < 0 || dst_reg->smin_value < 0) { 4942 /* Ain't nobody got time to multiply that sign */ 4943 __mark_reg_unbounded(dst_reg); 4944 __update_reg_bounds(dst_reg); 4945 break; 4946 } 4947 /* Both values are positive, so we can work with unsigned and 4948 * copy the result to signed (unless it exceeds S64_MAX). 4949 */ 4950 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) { 4951 /* Potential overflow, we know nothing */ 4952 __mark_reg_unbounded(dst_reg); 4953 /* (except what we can learn from the var_off) */ 4954 __update_reg_bounds(dst_reg); 4955 break; 4956 } 4957 dst_reg->umin_value *= umin_val; 4958 dst_reg->umax_value *= umax_val; 4959 if (dst_reg->umax_value > S64_MAX) { 4960 /* Overflow possible, we know nothing */ 4961 dst_reg->smin_value = S64_MIN; 4962 dst_reg->smax_value = S64_MAX; 4963 } else { 4964 dst_reg->smin_value = dst_reg->umin_value; 4965 dst_reg->smax_value = dst_reg->umax_value; 4966 } 4967 break; 4968 case BPF_AND: 4969 if (src_known && dst_known) { 4970 __mark_reg_known(dst_reg, dst_reg->var_off.value & 4971 src_reg.var_off.value); 4972 break; 4973 } 4974 /* We get our minimum from the var_off, since that's inherently 4975 * bitwise. Our maximum is the minimum of the operands' maxima. 4976 */ 4977 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off); 4978 dst_reg->umin_value = dst_reg->var_off.value; 4979 dst_reg->umax_value = min(dst_reg->umax_value, umax_val); 4980 if (dst_reg->smin_value < 0 || smin_val < 0) { 4981 /* Lose signed bounds when ANDing negative numbers, 4982 * ain't nobody got time for that. 4983 */ 4984 dst_reg->smin_value = S64_MIN; 4985 dst_reg->smax_value = S64_MAX; 4986 } else { 4987 /* ANDing two positives gives a positive, so safe to 4988 * cast result into s64. 4989 */ 4990 dst_reg->smin_value = dst_reg->umin_value; 4991 dst_reg->smax_value = dst_reg->umax_value; 4992 } 4993 /* We may learn something more from the var_off */ 4994 __update_reg_bounds(dst_reg); 4995 break; 4996 case BPF_OR: 4997 if (src_known && dst_known) { 4998 __mark_reg_known(dst_reg, dst_reg->var_off.value | 4999 src_reg.var_off.value); 5000 break; 5001 } 5002 /* We get our maximum from the var_off, and our minimum is the 5003 * maximum of the operands' minima 5004 */ 5005 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off); 5006 dst_reg->umin_value = max(dst_reg->umin_value, umin_val); 5007 dst_reg->umax_value = dst_reg->var_off.value | 5008 dst_reg->var_off.mask; 5009 if (dst_reg->smin_value < 0 || smin_val < 0) { 5010 /* Lose signed bounds when ORing negative numbers, 5011 * ain't nobody got time for that. 5012 */ 5013 dst_reg->smin_value = S64_MIN; 5014 dst_reg->smax_value = S64_MAX; 5015 } else { 5016 /* ORing two positives gives a positive, so safe to 5017 * cast result into s64. 5018 */ 5019 dst_reg->smin_value = dst_reg->umin_value; 5020 dst_reg->smax_value = dst_reg->umax_value; 5021 } 5022 /* We may learn something more from the var_off */ 5023 __update_reg_bounds(dst_reg); 5024 break; 5025 case BPF_LSH: 5026 if (umax_val >= insn_bitness) { 5027 /* Shifts greater than 31 or 63 are undefined. 5028 * This includes shifts by a negative number. 5029 */ 5030 mark_reg_unknown(env, regs, insn->dst_reg); 5031 break; 5032 } 5033 /* We lose all sign bit information (except what we can pick 5034 * up from var_off) 5035 */ 5036 dst_reg->smin_value = S64_MIN; 5037 dst_reg->smax_value = S64_MAX; 5038 /* If we might shift our top bit out, then we know nothing */ 5039 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) { 5040 dst_reg->umin_value = 0; 5041 dst_reg->umax_value = U64_MAX; 5042 } else { 5043 dst_reg->umin_value <<= umin_val; 5044 dst_reg->umax_value <<= umax_val; 5045 } 5046 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); 5047 /* We may learn something more from the var_off */ 5048 __update_reg_bounds(dst_reg); 5049 break; 5050 case BPF_RSH: 5051 if (umax_val >= insn_bitness) { 5052 /* Shifts greater than 31 or 63 are undefined. 5053 * This includes shifts by a negative number. 5054 */ 5055 mark_reg_unknown(env, regs, insn->dst_reg); 5056 break; 5057 } 5058 /* BPF_RSH is an unsigned shift. If the value in dst_reg might 5059 * be negative, then either: 5060 * 1) src_reg might be zero, so the sign bit of the result is 5061 * unknown, so we lose our signed bounds 5062 * 2) it's known negative, thus the unsigned bounds capture the 5063 * signed bounds 5064 * 3) the signed bounds cross zero, so they tell us nothing 5065 * about the result 5066 * If the value in dst_reg is known nonnegative, then again the 5067 * unsigned bounts capture the signed bounds. 5068 * Thus, in all cases it suffices to blow away our signed bounds 5069 * and rely on inferring new ones from the unsigned bounds and 5070 * var_off of the result. 5071 */ 5072 dst_reg->smin_value = S64_MIN; 5073 dst_reg->smax_value = S64_MAX; 5074 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); 5075 dst_reg->umin_value >>= umax_val; 5076 dst_reg->umax_value >>= umin_val; 5077 /* We may learn something more from the var_off */ 5078 __update_reg_bounds(dst_reg); 5079 break; 5080 case BPF_ARSH: 5081 if (umax_val >= insn_bitness) { 5082 /* Shifts greater than 31 or 63 are undefined. 5083 * This includes shifts by a negative number. 5084 */ 5085 mark_reg_unknown(env, regs, insn->dst_reg); 5086 break; 5087 } 5088 5089 /* Upon reaching here, src_known is true and 5090 * umax_val is equal to umin_val. 5091 */ 5092 dst_reg->smin_value >>= umin_val; 5093 dst_reg->smax_value >>= umin_val; 5094 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val); 5095 5096 /* blow away the dst_reg umin_value/umax_value and rely on 5097 * dst_reg var_off to refine the result. 5098 */ 5099 dst_reg->umin_value = 0; 5100 dst_reg->umax_value = U64_MAX; 5101 __update_reg_bounds(dst_reg); 5102 break; 5103 default: 5104 mark_reg_unknown(env, regs, insn->dst_reg); 5105 break; 5106 } 5107 5108 if (BPF_CLASS(insn->code) != BPF_ALU64) { 5109 /* 32-bit ALU ops are (32,32)->32 */ 5110 coerce_reg_to_size(dst_reg, 4); 5111 } 5112 5113 __reg_deduce_bounds(dst_reg); 5114 __reg_bound_offset(dst_reg); 5115 return 0; 5116 } 5117 5118 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max 5119 * and var_off. 5120 */ 5121 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, 5122 struct bpf_insn *insn) 5123 { 5124 struct bpf_verifier_state *vstate = env->cur_state; 5125 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 5126 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; 5127 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; 5128 u8 opcode = BPF_OP(insn->code); 5129 int err; 5130 5131 dst_reg = ®s[insn->dst_reg]; 5132 src_reg = NULL; 5133 if (dst_reg->type != SCALAR_VALUE) 5134 ptr_reg = dst_reg; 5135 if (BPF_SRC(insn->code) == BPF_X) { 5136 src_reg = ®s[insn->src_reg]; 5137 if (src_reg->type != SCALAR_VALUE) { 5138 if (dst_reg->type != SCALAR_VALUE) { 5139 /* Combining two pointers by any ALU op yields 5140 * an arbitrary scalar. Disallow all math except 5141 * pointer subtraction 5142 */ 5143 if (opcode == BPF_SUB && env->allow_ptr_leaks) { 5144 mark_reg_unknown(env, regs, insn->dst_reg); 5145 return 0; 5146 } 5147 verbose(env, "R%d pointer %s pointer prohibited\n", 5148 insn->dst_reg, 5149 bpf_alu_string[opcode >> 4]); 5150 return -EACCES; 5151 } else { 5152 /* scalar += pointer 5153 * This is legal, but we have to reverse our 5154 * src/dest handling in computing the range 5155 */ 5156 err = mark_chain_precision(env, insn->dst_reg); 5157 if (err) 5158 return err; 5159 return adjust_ptr_min_max_vals(env, insn, 5160 src_reg, dst_reg); 5161 } 5162 } else if (ptr_reg) { 5163 /* pointer += scalar */ 5164 err = mark_chain_precision(env, insn->src_reg); 5165 if (err) 5166 return err; 5167 return adjust_ptr_min_max_vals(env, insn, 5168 dst_reg, src_reg); 5169 } 5170 } else { 5171 /* Pretend the src is a reg with a known value, since we only 5172 * need to be able to read from this state. 5173 */ 5174 off_reg.type = SCALAR_VALUE; 5175 __mark_reg_known(&off_reg, insn->imm); 5176 src_reg = &off_reg; 5177 if (ptr_reg) /* pointer += K */ 5178 return adjust_ptr_min_max_vals(env, insn, 5179 ptr_reg, src_reg); 5180 } 5181 5182 /* Got here implies adding two SCALAR_VALUEs */ 5183 if (WARN_ON_ONCE(ptr_reg)) { 5184 print_verifier_state(env, state); 5185 verbose(env, "verifier internal error: unexpected ptr_reg\n"); 5186 return -EINVAL; 5187 } 5188 if (WARN_ON(!src_reg)) { 5189 print_verifier_state(env, state); 5190 verbose(env, "verifier internal error: no src_reg\n"); 5191 return -EINVAL; 5192 } 5193 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); 5194 } 5195 5196 /* check validity of 32-bit and 64-bit arithmetic operations */ 5197 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) 5198 { 5199 struct bpf_reg_state *regs = cur_regs(env); 5200 u8 opcode = BPF_OP(insn->code); 5201 int err; 5202 5203 if (opcode == BPF_END || opcode == BPF_NEG) { 5204 if (opcode == BPF_NEG) { 5205 if (BPF_SRC(insn->code) != 0 || 5206 insn->src_reg != BPF_REG_0 || 5207 insn->off != 0 || insn->imm != 0) { 5208 verbose(env, "BPF_NEG uses reserved fields\n"); 5209 return -EINVAL; 5210 } 5211 } else { 5212 if (insn->src_reg != BPF_REG_0 || insn->off != 0 || 5213 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) || 5214 BPF_CLASS(insn->code) == BPF_ALU64) { 5215 verbose(env, "BPF_END uses reserved fields\n"); 5216 return -EINVAL; 5217 } 5218 } 5219 5220 /* check src operand */ 5221 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 5222 if (err) 5223 return err; 5224 5225 if (is_pointer_value(env, insn->dst_reg)) { 5226 verbose(env, "R%d pointer arithmetic prohibited\n", 5227 insn->dst_reg); 5228 return -EACCES; 5229 } 5230 5231 /* check dest operand */ 5232 err = check_reg_arg(env, insn->dst_reg, DST_OP); 5233 if (err) 5234 return err; 5235 5236 } else if (opcode == BPF_MOV) { 5237 5238 if (BPF_SRC(insn->code) == BPF_X) { 5239 if (insn->imm != 0 || insn->off != 0) { 5240 verbose(env, "BPF_MOV uses reserved fields\n"); 5241 return -EINVAL; 5242 } 5243 5244 /* check src operand */ 5245 err = check_reg_arg(env, insn->src_reg, SRC_OP); 5246 if (err) 5247 return err; 5248 } else { 5249 if (insn->src_reg != BPF_REG_0 || insn->off != 0) { 5250 verbose(env, "BPF_MOV uses reserved fields\n"); 5251 return -EINVAL; 5252 } 5253 } 5254 5255 /* check dest operand, mark as required later */ 5256 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 5257 if (err) 5258 return err; 5259 5260 if (BPF_SRC(insn->code) == BPF_X) { 5261 struct bpf_reg_state *src_reg = regs + insn->src_reg; 5262 struct bpf_reg_state *dst_reg = regs + insn->dst_reg; 5263 5264 if (BPF_CLASS(insn->code) == BPF_ALU64) { 5265 /* case: R1 = R2 5266 * copy register state to dest reg 5267 */ 5268 *dst_reg = *src_reg; 5269 dst_reg->live |= REG_LIVE_WRITTEN; 5270 dst_reg->subreg_def = DEF_NOT_SUBREG; 5271 } else { 5272 /* R1 = (u32) R2 */ 5273 if (is_pointer_value(env, insn->src_reg)) { 5274 verbose(env, 5275 "R%d partial copy of pointer\n", 5276 insn->src_reg); 5277 return -EACCES; 5278 } else if (src_reg->type == SCALAR_VALUE) { 5279 *dst_reg = *src_reg; 5280 dst_reg->live |= REG_LIVE_WRITTEN; 5281 dst_reg->subreg_def = env->insn_idx + 1; 5282 } else { 5283 mark_reg_unknown(env, regs, 5284 insn->dst_reg); 5285 } 5286 coerce_reg_to_size(dst_reg, 4); 5287 } 5288 } else { 5289 /* case: R = imm 5290 * remember the value we stored into this reg 5291 */ 5292 /* clear any state __mark_reg_known doesn't set */ 5293 mark_reg_unknown(env, regs, insn->dst_reg); 5294 regs[insn->dst_reg].type = SCALAR_VALUE; 5295 if (BPF_CLASS(insn->code) == BPF_ALU64) { 5296 __mark_reg_known(regs + insn->dst_reg, 5297 insn->imm); 5298 } else { 5299 __mark_reg_known(regs + insn->dst_reg, 5300 (u32)insn->imm); 5301 } 5302 } 5303 5304 } else if (opcode > BPF_END) { 5305 verbose(env, "invalid BPF_ALU opcode %x\n", opcode); 5306 return -EINVAL; 5307 5308 } else { /* all other ALU ops: and, sub, xor, add, ... */ 5309 5310 if (BPF_SRC(insn->code) == BPF_X) { 5311 if (insn->imm != 0 || insn->off != 0) { 5312 verbose(env, "BPF_ALU uses reserved fields\n"); 5313 return -EINVAL; 5314 } 5315 /* check src1 operand */ 5316 err = check_reg_arg(env, insn->src_reg, SRC_OP); 5317 if (err) 5318 return err; 5319 } else { 5320 if (insn->src_reg != BPF_REG_0 || insn->off != 0) { 5321 verbose(env, "BPF_ALU uses reserved fields\n"); 5322 return -EINVAL; 5323 } 5324 } 5325 5326 /* check src2 operand */ 5327 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 5328 if (err) 5329 return err; 5330 5331 if ((opcode == BPF_MOD || opcode == BPF_DIV) && 5332 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) { 5333 verbose(env, "div by zero\n"); 5334 return -EINVAL; 5335 } 5336 5337 if ((opcode == BPF_LSH || opcode == BPF_RSH || 5338 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { 5339 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; 5340 5341 if (insn->imm < 0 || insn->imm >= size) { 5342 verbose(env, "invalid shift %d\n", insn->imm); 5343 return -EINVAL; 5344 } 5345 } 5346 5347 /* check dest operand */ 5348 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 5349 if (err) 5350 return err; 5351 5352 return adjust_reg_min_max_vals(env, insn); 5353 } 5354 5355 return 0; 5356 } 5357 5358 static void __find_good_pkt_pointers(struct bpf_func_state *state, 5359 struct bpf_reg_state *dst_reg, 5360 enum bpf_reg_type type, u16 new_range) 5361 { 5362 struct bpf_reg_state *reg; 5363 int i; 5364 5365 for (i = 0; i < MAX_BPF_REG; i++) { 5366 reg = &state->regs[i]; 5367 if (reg->type == type && reg->id == dst_reg->id) 5368 /* keep the maximum range already checked */ 5369 reg->range = max(reg->range, new_range); 5370 } 5371 5372 bpf_for_each_spilled_reg(i, state, reg) { 5373 if (!reg) 5374 continue; 5375 if (reg->type == type && reg->id == dst_reg->id) 5376 reg->range = max(reg->range, new_range); 5377 } 5378 } 5379 5380 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, 5381 struct bpf_reg_state *dst_reg, 5382 enum bpf_reg_type type, 5383 bool range_right_open) 5384 { 5385 u16 new_range; 5386 int i; 5387 5388 if (dst_reg->off < 0 || 5389 (dst_reg->off == 0 && range_right_open)) 5390 /* This doesn't give us any range */ 5391 return; 5392 5393 if (dst_reg->umax_value > MAX_PACKET_OFF || 5394 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF) 5395 /* Risk of overflow. For instance, ptr + (1<<63) may be less 5396 * than pkt_end, but that's because it's also less than pkt. 5397 */ 5398 return; 5399 5400 new_range = dst_reg->off; 5401 if (range_right_open) 5402 new_range--; 5403 5404 /* Examples for register markings: 5405 * 5406 * pkt_data in dst register: 5407 * 5408 * r2 = r3; 5409 * r2 += 8; 5410 * if (r2 > pkt_end) goto <handle exception> 5411 * <access okay> 5412 * 5413 * r2 = r3; 5414 * r2 += 8; 5415 * if (r2 < pkt_end) goto <access okay> 5416 * <handle exception> 5417 * 5418 * Where: 5419 * r2 == dst_reg, pkt_end == src_reg 5420 * r2=pkt(id=n,off=8,r=0) 5421 * r3=pkt(id=n,off=0,r=0) 5422 * 5423 * pkt_data in src register: 5424 * 5425 * r2 = r3; 5426 * r2 += 8; 5427 * if (pkt_end >= r2) goto <access okay> 5428 * <handle exception> 5429 * 5430 * r2 = r3; 5431 * r2 += 8; 5432 * if (pkt_end <= r2) goto <handle exception> 5433 * <access okay> 5434 * 5435 * Where: 5436 * pkt_end == dst_reg, r2 == src_reg 5437 * r2=pkt(id=n,off=8,r=0) 5438 * r3=pkt(id=n,off=0,r=0) 5439 * 5440 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) 5441 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8) 5442 * and [r3, r3 + 8-1) respectively is safe to access depending on 5443 * the check. 5444 */ 5445 5446 /* If our ids match, then we must have the same max_value. And we 5447 * don't care about the other reg's fixed offset, since if it's too big 5448 * the range won't allow anything. 5449 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. 5450 */ 5451 for (i = 0; i <= vstate->curframe; i++) 5452 __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, 5453 new_range); 5454 } 5455 5456 /* compute branch direction of the expression "if (reg opcode val) goto target;" 5457 * and return: 5458 * 1 - branch will be taken and "goto target" will be executed 5459 * 0 - branch will not be taken and fall-through to next insn 5460 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10] 5461 */ 5462 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, 5463 bool is_jmp32) 5464 { 5465 struct bpf_reg_state reg_lo; 5466 s64 sval; 5467 5468 if (__is_pointer_value(false, reg)) 5469 return -1; 5470 5471 if (is_jmp32) { 5472 reg_lo = *reg; 5473 reg = ®_lo; 5474 /* For JMP32, only low 32 bits are compared, coerce_reg_to_size 5475 * could truncate high bits and update umin/umax according to 5476 * information of low bits. 5477 */ 5478 coerce_reg_to_size(reg, 4); 5479 /* smin/smax need special handling. For example, after coerce, 5480 * if smin_value is 0x00000000ffffffffLL, the value is -1 when 5481 * used as operand to JMP32. It is a negative number from s32's 5482 * point of view, while it is a positive number when seen as 5483 * s64. The smin/smax are kept as s64, therefore, when used with 5484 * JMP32, they need to be transformed into s32, then sign 5485 * extended back to s64. 5486 * 5487 * Also, smin/smax were copied from umin/umax. If umin/umax has 5488 * different sign bit, then min/max relationship doesn't 5489 * maintain after casting into s32, for this case, set smin/smax 5490 * to safest range. 5491 */ 5492 if ((reg->umax_value ^ reg->umin_value) & 5493 (1ULL << 31)) { 5494 reg->smin_value = S32_MIN; 5495 reg->smax_value = S32_MAX; 5496 } 5497 reg->smin_value = (s64)(s32)reg->smin_value; 5498 reg->smax_value = (s64)(s32)reg->smax_value; 5499 5500 val = (u32)val; 5501 sval = (s64)(s32)val; 5502 } else { 5503 sval = (s64)val; 5504 } 5505 5506 switch (opcode) { 5507 case BPF_JEQ: 5508 if (tnum_is_const(reg->var_off)) 5509 return !!tnum_equals_const(reg->var_off, val); 5510 break; 5511 case BPF_JNE: 5512 if (tnum_is_const(reg->var_off)) 5513 return !tnum_equals_const(reg->var_off, val); 5514 break; 5515 case BPF_JSET: 5516 if ((~reg->var_off.mask & reg->var_off.value) & val) 5517 return 1; 5518 if (!((reg->var_off.mask | reg->var_off.value) & val)) 5519 return 0; 5520 break; 5521 case BPF_JGT: 5522 if (reg->umin_value > val) 5523 return 1; 5524 else if (reg->umax_value <= val) 5525 return 0; 5526 break; 5527 case BPF_JSGT: 5528 if (reg->smin_value > sval) 5529 return 1; 5530 else if (reg->smax_value < sval) 5531 return 0; 5532 break; 5533 case BPF_JLT: 5534 if (reg->umax_value < val) 5535 return 1; 5536 else if (reg->umin_value >= val) 5537 return 0; 5538 break; 5539 case BPF_JSLT: 5540 if (reg->smax_value < sval) 5541 return 1; 5542 else if (reg->smin_value >= sval) 5543 return 0; 5544 break; 5545 case BPF_JGE: 5546 if (reg->umin_value >= val) 5547 return 1; 5548 else if (reg->umax_value < val) 5549 return 0; 5550 break; 5551 case BPF_JSGE: 5552 if (reg->smin_value >= sval) 5553 return 1; 5554 else if (reg->smax_value < sval) 5555 return 0; 5556 break; 5557 case BPF_JLE: 5558 if (reg->umax_value <= val) 5559 return 1; 5560 else if (reg->umin_value > val) 5561 return 0; 5562 break; 5563 case BPF_JSLE: 5564 if (reg->smax_value <= sval) 5565 return 1; 5566 else if (reg->smin_value > sval) 5567 return 0; 5568 break; 5569 } 5570 5571 return -1; 5572 } 5573 5574 /* Generate min value of the high 32-bit from TNUM info. */ 5575 static u64 gen_hi_min(struct tnum var) 5576 { 5577 return var.value & ~0xffffffffULL; 5578 } 5579 5580 /* Generate max value of the high 32-bit from TNUM info. */ 5581 static u64 gen_hi_max(struct tnum var) 5582 { 5583 return (var.value | var.mask) & ~0xffffffffULL; 5584 } 5585 5586 /* Return true if VAL is compared with a s64 sign extended from s32, and they 5587 * are with the same signedness. 5588 */ 5589 static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg) 5590 { 5591 return ((s32)sval >= 0 && 5592 reg->smin_value >= 0 && reg->smax_value <= S32_MAX) || 5593 ((s32)sval < 0 && 5594 reg->smax_value <= 0 && reg->smin_value >= S32_MIN); 5595 } 5596 5597 /* Adjusts the register min/max values in the case that the dst_reg is the 5598 * variable register that we are working on, and src_reg is a constant or we're 5599 * simply doing a BPF_K check. 5600 * In JEQ/JNE cases we also adjust the var_off values. 5601 */ 5602 static void reg_set_min_max(struct bpf_reg_state *true_reg, 5603 struct bpf_reg_state *false_reg, u64 val, 5604 u8 opcode, bool is_jmp32) 5605 { 5606 s64 sval; 5607 5608 /* If the dst_reg is a pointer, we can't learn anything about its 5609 * variable offset from the compare (unless src_reg were a pointer into 5610 * the same object, but we don't bother with that. 5611 * Since false_reg and true_reg have the same type by construction, we 5612 * only need to check one of them for pointerness. 5613 */ 5614 if (__is_pointer_value(false, false_reg)) 5615 return; 5616 5617 val = is_jmp32 ? (u32)val : val; 5618 sval = is_jmp32 ? (s64)(s32)val : (s64)val; 5619 5620 switch (opcode) { 5621 case BPF_JEQ: 5622 case BPF_JNE: 5623 { 5624 struct bpf_reg_state *reg = 5625 opcode == BPF_JEQ ? true_reg : false_reg; 5626 5627 /* For BPF_JEQ, if this is false we know nothing Jon Snow, but 5628 * if it is true we know the value for sure. Likewise for 5629 * BPF_JNE. 5630 */ 5631 if (is_jmp32) { 5632 u64 old_v = reg->var_off.value; 5633 u64 hi_mask = ~0xffffffffULL; 5634 5635 reg->var_off.value = (old_v & hi_mask) | val; 5636 reg->var_off.mask &= hi_mask; 5637 } else { 5638 __mark_reg_known(reg, val); 5639 } 5640 break; 5641 } 5642 case BPF_JSET: 5643 false_reg->var_off = tnum_and(false_reg->var_off, 5644 tnum_const(~val)); 5645 if (is_power_of_2(val)) 5646 true_reg->var_off = tnum_or(true_reg->var_off, 5647 tnum_const(val)); 5648 break; 5649 case BPF_JGE: 5650 case BPF_JGT: 5651 { 5652 u64 false_umax = opcode == BPF_JGT ? val : val - 1; 5653 u64 true_umin = opcode == BPF_JGT ? val + 1 : val; 5654 5655 if (is_jmp32) { 5656 false_umax += gen_hi_max(false_reg->var_off); 5657 true_umin += gen_hi_min(true_reg->var_off); 5658 } 5659 false_reg->umax_value = min(false_reg->umax_value, false_umax); 5660 true_reg->umin_value = max(true_reg->umin_value, true_umin); 5661 break; 5662 } 5663 case BPF_JSGE: 5664 case BPF_JSGT: 5665 { 5666 s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1; 5667 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval; 5668 5669 /* If the full s64 was not sign-extended from s32 then don't 5670 * deduct further info. 5671 */ 5672 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) 5673 break; 5674 false_reg->smax_value = min(false_reg->smax_value, false_smax); 5675 true_reg->smin_value = max(true_reg->smin_value, true_smin); 5676 break; 5677 } 5678 case BPF_JLE: 5679 case BPF_JLT: 5680 { 5681 u64 false_umin = opcode == BPF_JLT ? val : val + 1; 5682 u64 true_umax = opcode == BPF_JLT ? val - 1 : val; 5683 5684 if (is_jmp32) { 5685 false_umin += gen_hi_min(false_reg->var_off); 5686 true_umax += gen_hi_max(true_reg->var_off); 5687 } 5688 false_reg->umin_value = max(false_reg->umin_value, false_umin); 5689 true_reg->umax_value = min(true_reg->umax_value, true_umax); 5690 break; 5691 } 5692 case BPF_JSLE: 5693 case BPF_JSLT: 5694 { 5695 s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1; 5696 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval; 5697 5698 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) 5699 break; 5700 false_reg->smin_value = max(false_reg->smin_value, false_smin); 5701 true_reg->smax_value = min(true_reg->smax_value, true_smax); 5702 break; 5703 } 5704 default: 5705 break; 5706 } 5707 5708 __reg_deduce_bounds(false_reg); 5709 __reg_deduce_bounds(true_reg); 5710 /* We might have learned some bits from the bounds. */ 5711 __reg_bound_offset(false_reg); 5712 __reg_bound_offset(true_reg); 5713 if (is_jmp32) { 5714 __reg_bound_offset32(false_reg); 5715 __reg_bound_offset32(true_reg); 5716 } 5717 /* Intersecting with the old var_off might have improved our bounds 5718 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), 5719 * then new var_off is (0; 0x7f...fc) which improves our umax. 5720 */ 5721 __update_reg_bounds(false_reg); 5722 __update_reg_bounds(true_reg); 5723 } 5724 5725 /* Same as above, but for the case that dst_reg holds a constant and src_reg is 5726 * the variable reg. 5727 */ 5728 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, 5729 struct bpf_reg_state *false_reg, u64 val, 5730 u8 opcode, bool is_jmp32) 5731 { 5732 s64 sval; 5733 5734 if (__is_pointer_value(false, false_reg)) 5735 return; 5736 5737 val = is_jmp32 ? (u32)val : val; 5738 sval = is_jmp32 ? (s64)(s32)val : (s64)val; 5739 5740 switch (opcode) { 5741 case BPF_JEQ: 5742 case BPF_JNE: 5743 { 5744 struct bpf_reg_state *reg = 5745 opcode == BPF_JEQ ? true_reg : false_reg; 5746 5747 if (is_jmp32) { 5748 u64 old_v = reg->var_off.value; 5749 u64 hi_mask = ~0xffffffffULL; 5750 5751 reg->var_off.value = (old_v & hi_mask) | val; 5752 reg->var_off.mask &= hi_mask; 5753 } else { 5754 __mark_reg_known(reg, val); 5755 } 5756 break; 5757 } 5758 case BPF_JSET: 5759 false_reg->var_off = tnum_and(false_reg->var_off, 5760 tnum_const(~val)); 5761 if (is_power_of_2(val)) 5762 true_reg->var_off = tnum_or(true_reg->var_off, 5763 tnum_const(val)); 5764 break; 5765 case BPF_JGE: 5766 case BPF_JGT: 5767 { 5768 u64 false_umin = opcode == BPF_JGT ? val : val + 1; 5769 u64 true_umax = opcode == BPF_JGT ? val - 1 : val; 5770 5771 if (is_jmp32) { 5772 false_umin += gen_hi_min(false_reg->var_off); 5773 true_umax += gen_hi_max(true_reg->var_off); 5774 } 5775 false_reg->umin_value = max(false_reg->umin_value, false_umin); 5776 true_reg->umax_value = min(true_reg->umax_value, true_umax); 5777 break; 5778 } 5779 case BPF_JSGE: 5780 case BPF_JSGT: 5781 { 5782 s64 false_smin = opcode == BPF_JSGT ? sval : sval + 1; 5783 s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval; 5784 5785 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) 5786 break; 5787 false_reg->smin_value = max(false_reg->smin_value, false_smin); 5788 true_reg->smax_value = min(true_reg->smax_value, true_smax); 5789 break; 5790 } 5791 case BPF_JLE: 5792 case BPF_JLT: 5793 { 5794 u64 false_umax = opcode == BPF_JLT ? val : val - 1; 5795 u64 true_umin = opcode == BPF_JLT ? val + 1 : val; 5796 5797 if (is_jmp32) { 5798 false_umax += gen_hi_max(false_reg->var_off); 5799 true_umin += gen_hi_min(true_reg->var_off); 5800 } 5801 false_reg->umax_value = min(false_reg->umax_value, false_umax); 5802 true_reg->umin_value = max(true_reg->umin_value, true_umin); 5803 break; 5804 } 5805 case BPF_JSLE: 5806 case BPF_JSLT: 5807 { 5808 s64 false_smax = opcode == BPF_JSLT ? sval : sval - 1; 5809 s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval; 5810 5811 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) 5812 break; 5813 false_reg->smax_value = min(false_reg->smax_value, false_smax); 5814 true_reg->smin_value = max(true_reg->smin_value, true_smin); 5815 break; 5816 } 5817 default: 5818 break; 5819 } 5820 5821 __reg_deduce_bounds(false_reg); 5822 __reg_deduce_bounds(true_reg); 5823 /* We might have learned some bits from the bounds. */ 5824 __reg_bound_offset(false_reg); 5825 __reg_bound_offset(true_reg); 5826 if (is_jmp32) { 5827 __reg_bound_offset32(false_reg); 5828 __reg_bound_offset32(true_reg); 5829 } 5830 /* Intersecting with the old var_off might have improved our bounds 5831 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), 5832 * then new var_off is (0; 0x7f...fc) which improves our umax. 5833 */ 5834 __update_reg_bounds(false_reg); 5835 __update_reg_bounds(true_reg); 5836 } 5837 5838 /* Regs are known to be equal, so intersect their min/max/var_off */ 5839 static void __reg_combine_min_max(struct bpf_reg_state *src_reg, 5840 struct bpf_reg_state *dst_reg) 5841 { 5842 src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value, 5843 dst_reg->umin_value); 5844 src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value, 5845 dst_reg->umax_value); 5846 src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value, 5847 dst_reg->smin_value); 5848 src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value, 5849 dst_reg->smax_value); 5850 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off, 5851 dst_reg->var_off); 5852 /* We might have learned new bounds from the var_off. */ 5853 __update_reg_bounds(src_reg); 5854 __update_reg_bounds(dst_reg); 5855 /* We might have learned something about the sign bit. */ 5856 __reg_deduce_bounds(src_reg); 5857 __reg_deduce_bounds(dst_reg); 5858 /* We might have learned some bits from the bounds. */ 5859 __reg_bound_offset(src_reg); 5860 __reg_bound_offset(dst_reg); 5861 /* Intersecting with the old var_off might have improved our bounds 5862 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), 5863 * then new var_off is (0; 0x7f...fc) which improves our umax. 5864 */ 5865 __update_reg_bounds(src_reg); 5866 __update_reg_bounds(dst_reg); 5867 } 5868 5869 static void reg_combine_min_max(struct bpf_reg_state *true_src, 5870 struct bpf_reg_state *true_dst, 5871 struct bpf_reg_state *false_src, 5872 struct bpf_reg_state *false_dst, 5873 u8 opcode) 5874 { 5875 switch (opcode) { 5876 case BPF_JEQ: 5877 __reg_combine_min_max(true_src, true_dst); 5878 break; 5879 case BPF_JNE: 5880 __reg_combine_min_max(false_src, false_dst); 5881 break; 5882 } 5883 } 5884 5885 static void mark_ptr_or_null_reg(struct bpf_func_state *state, 5886 struct bpf_reg_state *reg, u32 id, 5887 bool is_null) 5888 { 5889 if (reg_type_may_be_null(reg->type) && reg->id == id) { 5890 /* Old offset (both fixed and variable parts) should 5891 * have been known-zero, because we don't allow pointer 5892 * arithmetic on pointers that might be NULL. 5893 */ 5894 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || 5895 !tnum_equals_const(reg->var_off, 0) || 5896 reg->off)) { 5897 __mark_reg_known_zero(reg); 5898 reg->off = 0; 5899 } 5900 if (is_null) { 5901 reg->type = SCALAR_VALUE; 5902 } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) { 5903 if (reg->map_ptr->inner_map_meta) { 5904 reg->type = CONST_PTR_TO_MAP; 5905 reg->map_ptr = reg->map_ptr->inner_map_meta; 5906 } else if (reg->map_ptr->map_type == 5907 BPF_MAP_TYPE_XSKMAP) { 5908 reg->type = PTR_TO_XDP_SOCK; 5909 } else { 5910 reg->type = PTR_TO_MAP_VALUE; 5911 } 5912 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { 5913 reg->type = PTR_TO_SOCKET; 5914 } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) { 5915 reg->type = PTR_TO_SOCK_COMMON; 5916 } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { 5917 reg->type = PTR_TO_TCP_SOCK; 5918 } 5919 if (is_null) { 5920 /* We don't need id and ref_obj_id from this point 5921 * onwards anymore, thus we should better reset it, 5922 * so that state pruning has chances to take effect. 5923 */ 5924 reg->id = 0; 5925 reg->ref_obj_id = 0; 5926 } else if (!reg_may_point_to_spin_lock(reg)) { 5927 /* For not-NULL ptr, reg->ref_obj_id will be reset 5928 * in release_reg_references(). 5929 * 5930 * reg->id is still used by spin_lock ptr. Other 5931 * than spin_lock ptr type, reg->id can be reset. 5932 */ 5933 reg->id = 0; 5934 } 5935 } 5936 } 5937 5938 static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, 5939 bool is_null) 5940 { 5941 struct bpf_reg_state *reg; 5942 int i; 5943 5944 for (i = 0; i < MAX_BPF_REG; i++) 5945 mark_ptr_or_null_reg(state, &state->regs[i], id, is_null); 5946 5947 bpf_for_each_spilled_reg(i, state, reg) { 5948 if (!reg) 5949 continue; 5950 mark_ptr_or_null_reg(state, reg, id, is_null); 5951 } 5952 } 5953 5954 /* The logic is similar to find_good_pkt_pointers(), both could eventually 5955 * be folded together at some point. 5956 */ 5957 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, 5958 bool is_null) 5959 { 5960 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 5961 struct bpf_reg_state *regs = state->regs; 5962 u32 ref_obj_id = regs[regno].ref_obj_id; 5963 u32 id = regs[regno].id; 5964 int i; 5965 5966 if (ref_obj_id && ref_obj_id == id && is_null) 5967 /* regs[regno] is in the " == NULL" branch. 5968 * No one could have freed the reference state before 5969 * doing the NULL check. 5970 */ 5971 WARN_ON_ONCE(release_reference_state(state, id)); 5972 5973 for (i = 0; i <= vstate->curframe; i++) 5974 __mark_ptr_or_null_regs(vstate->frame[i], id, is_null); 5975 } 5976 5977 static bool try_match_pkt_pointers(const struct bpf_insn *insn, 5978 struct bpf_reg_state *dst_reg, 5979 struct bpf_reg_state *src_reg, 5980 struct bpf_verifier_state *this_branch, 5981 struct bpf_verifier_state *other_branch) 5982 { 5983 if (BPF_SRC(insn->code) != BPF_X) 5984 return false; 5985 5986 /* Pointers are always 64-bit. */ 5987 if (BPF_CLASS(insn->code) == BPF_JMP32) 5988 return false; 5989 5990 switch (BPF_OP(insn->code)) { 5991 case BPF_JGT: 5992 if ((dst_reg->type == PTR_TO_PACKET && 5993 src_reg->type == PTR_TO_PACKET_END) || 5994 (dst_reg->type == PTR_TO_PACKET_META && 5995 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 5996 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */ 5997 find_good_pkt_pointers(this_branch, dst_reg, 5998 dst_reg->type, false); 5999 } else if ((dst_reg->type == PTR_TO_PACKET_END && 6000 src_reg->type == PTR_TO_PACKET) || 6001 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 6002 src_reg->type == PTR_TO_PACKET_META)) { 6003 /* pkt_end > pkt_data', pkt_data > pkt_meta' */ 6004 find_good_pkt_pointers(other_branch, src_reg, 6005 src_reg->type, true); 6006 } else { 6007 return false; 6008 } 6009 break; 6010 case BPF_JLT: 6011 if ((dst_reg->type == PTR_TO_PACKET && 6012 src_reg->type == PTR_TO_PACKET_END) || 6013 (dst_reg->type == PTR_TO_PACKET_META && 6014 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 6015 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */ 6016 find_good_pkt_pointers(other_branch, dst_reg, 6017 dst_reg->type, true); 6018 } else if ((dst_reg->type == PTR_TO_PACKET_END && 6019 src_reg->type == PTR_TO_PACKET) || 6020 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 6021 src_reg->type == PTR_TO_PACKET_META)) { 6022 /* pkt_end < pkt_data', pkt_data > pkt_meta' */ 6023 find_good_pkt_pointers(this_branch, src_reg, 6024 src_reg->type, false); 6025 } else { 6026 return false; 6027 } 6028 break; 6029 case BPF_JGE: 6030 if ((dst_reg->type == PTR_TO_PACKET && 6031 src_reg->type == PTR_TO_PACKET_END) || 6032 (dst_reg->type == PTR_TO_PACKET_META && 6033 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 6034 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */ 6035 find_good_pkt_pointers(this_branch, dst_reg, 6036 dst_reg->type, true); 6037 } else if ((dst_reg->type == PTR_TO_PACKET_END && 6038 src_reg->type == PTR_TO_PACKET) || 6039 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 6040 src_reg->type == PTR_TO_PACKET_META)) { 6041 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */ 6042 find_good_pkt_pointers(other_branch, src_reg, 6043 src_reg->type, false); 6044 } else { 6045 return false; 6046 } 6047 break; 6048 case BPF_JLE: 6049 if ((dst_reg->type == PTR_TO_PACKET && 6050 src_reg->type == PTR_TO_PACKET_END) || 6051 (dst_reg->type == PTR_TO_PACKET_META && 6052 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 6053 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */ 6054 find_good_pkt_pointers(other_branch, dst_reg, 6055 dst_reg->type, false); 6056 } else if ((dst_reg->type == PTR_TO_PACKET_END && 6057 src_reg->type == PTR_TO_PACKET) || 6058 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 6059 src_reg->type == PTR_TO_PACKET_META)) { 6060 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */ 6061 find_good_pkt_pointers(this_branch, src_reg, 6062 src_reg->type, true); 6063 } else { 6064 return false; 6065 } 6066 break; 6067 default: 6068 return false; 6069 } 6070 6071 return true; 6072 } 6073 6074 static int check_cond_jmp_op(struct bpf_verifier_env *env, 6075 struct bpf_insn *insn, int *insn_idx) 6076 { 6077 struct bpf_verifier_state *this_branch = env->cur_state; 6078 struct bpf_verifier_state *other_branch; 6079 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; 6080 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; 6081 u8 opcode = BPF_OP(insn->code); 6082 bool is_jmp32; 6083 int pred = -1; 6084 int err; 6085 6086 /* Only conditional jumps are expected to reach here. */ 6087 if (opcode == BPF_JA || opcode > BPF_JSLE) { 6088 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode); 6089 return -EINVAL; 6090 } 6091 6092 if (BPF_SRC(insn->code) == BPF_X) { 6093 if (insn->imm != 0) { 6094 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); 6095 return -EINVAL; 6096 } 6097 6098 /* check src1 operand */ 6099 err = check_reg_arg(env, insn->src_reg, SRC_OP); 6100 if (err) 6101 return err; 6102 6103 if (is_pointer_value(env, insn->src_reg)) { 6104 verbose(env, "R%d pointer comparison prohibited\n", 6105 insn->src_reg); 6106 return -EACCES; 6107 } 6108 src_reg = ®s[insn->src_reg]; 6109 } else { 6110 if (insn->src_reg != BPF_REG_0) { 6111 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); 6112 return -EINVAL; 6113 } 6114 } 6115 6116 /* check src2 operand */ 6117 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 6118 if (err) 6119 return err; 6120 6121 dst_reg = ®s[insn->dst_reg]; 6122 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 6123 6124 if (BPF_SRC(insn->code) == BPF_K) 6125 pred = is_branch_taken(dst_reg, insn->imm, 6126 opcode, is_jmp32); 6127 else if (src_reg->type == SCALAR_VALUE && 6128 tnum_is_const(src_reg->var_off)) 6129 pred = is_branch_taken(dst_reg, src_reg->var_off.value, 6130 opcode, is_jmp32); 6131 if (pred >= 0) { 6132 err = mark_chain_precision(env, insn->dst_reg); 6133 if (BPF_SRC(insn->code) == BPF_X && !err) 6134 err = mark_chain_precision(env, insn->src_reg); 6135 if (err) 6136 return err; 6137 } 6138 if (pred == 1) { 6139 /* only follow the goto, ignore fall-through */ 6140 *insn_idx += insn->off; 6141 return 0; 6142 } else if (pred == 0) { 6143 /* only follow fall-through branch, since 6144 * that's where the program will go 6145 */ 6146 return 0; 6147 } 6148 6149 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, 6150 false); 6151 if (!other_branch) 6152 return -EFAULT; 6153 other_branch_regs = other_branch->frame[other_branch->curframe]->regs; 6154 6155 /* detect if we are comparing against a constant value so we can adjust 6156 * our min/max values for our dst register. 6157 * this is only legit if both are scalars (or pointers to the same 6158 * object, I suppose, but we don't support that right now), because 6159 * otherwise the different base pointers mean the offsets aren't 6160 * comparable. 6161 */ 6162 if (BPF_SRC(insn->code) == BPF_X) { 6163 struct bpf_reg_state *src_reg = ®s[insn->src_reg]; 6164 struct bpf_reg_state lo_reg0 = *dst_reg; 6165 struct bpf_reg_state lo_reg1 = *src_reg; 6166 struct bpf_reg_state *src_lo, *dst_lo; 6167 6168 dst_lo = &lo_reg0; 6169 src_lo = &lo_reg1; 6170 coerce_reg_to_size(dst_lo, 4); 6171 coerce_reg_to_size(src_lo, 4); 6172 6173 if (dst_reg->type == SCALAR_VALUE && 6174 src_reg->type == SCALAR_VALUE) { 6175 if (tnum_is_const(src_reg->var_off) || 6176 (is_jmp32 && tnum_is_const(src_lo->var_off))) 6177 reg_set_min_max(&other_branch_regs[insn->dst_reg], 6178 dst_reg, 6179 is_jmp32 6180 ? src_lo->var_off.value 6181 : src_reg->var_off.value, 6182 opcode, is_jmp32); 6183 else if (tnum_is_const(dst_reg->var_off) || 6184 (is_jmp32 && tnum_is_const(dst_lo->var_off))) 6185 reg_set_min_max_inv(&other_branch_regs[insn->src_reg], 6186 src_reg, 6187 is_jmp32 6188 ? dst_lo->var_off.value 6189 : dst_reg->var_off.value, 6190 opcode, is_jmp32); 6191 else if (!is_jmp32 && 6192 (opcode == BPF_JEQ || opcode == BPF_JNE)) 6193 /* Comparing for equality, we can combine knowledge */ 6194 reg_combine_min_max(&other_branch_regs[insn->src_reg], 6195 &other_branch_regs[insn->dst_reg], 6196 src_reg, dst_reg, opcode); 6197 } 6198 } else if (dst_reg->type == SCALAR_VALUE) { 6199 reg_set_min_max(&other_branch_regs[insn->dst_reg], 6200 dst_reg, insn->imm, opcode, is_jmp32); 6201 } 6202 6203 /* detect if R == 0 where R is returned from bpf_map_lookup_elem(). 6204 * NOTE: these optimizations below are related with pointer comparison 6205 * which will never be JMP32. 6206 */ 6207 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && 6208 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && 6209 reg_type_may_be_null(dst_reg->type)) { 6210 /* Mark all identical registers in each branch as either 6211 * safe or unknown depending R == 0 or R != 0 conditional. 6212 */ 6213 mark_ptr_or_null_regs(this_branch, insn->dst_reg, 6214 opcode == BPF_JNE); 6215 mark_ptr_or_null_regs(other_branch, insn->dst_reg, 6216 opcode == BPF_JEQ); 6217 } else if (!try_match_pkt_pointers(insn, dst_reg, ®s[insn->src_reg], 6218 this_branch, other_branch) && 6219 is_pointer_value(env, insn->dst_reg)) { 6220 verbose(env, "R%d pointer comparison prohibited\n", 6221 insn->dst_reg); 6222 return -EACCES; 6223 } 6224 if (env->log.level & BPF_LOG_LEVEL) 6225 print_verifier_state(env, this_branch->frame[this_branch->curframe]); 6226 return 0; 6227 } 6228 6229 /* verify BPF_LD_IMM64 instruction */ 6230 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) 6231 { 6232 struct bpf_insn_aux_data *aux = cur_aux(env); 6233 struct bpf_reg_state *regs = cur_regs(env); 6234 struct bpf_map *map; 6235 int err; 6236 6237 if (BPF_SIZE(insn->code) != BPF_DW) { 6238 verbose(env, "invalid BPF_LD_IMM insn\n"); 6239 return -EINVAL; 6240 } 6241 if (insn->off != 0) { 6242 verbose(env, "BPF_LD_IMM64 uses reserved fields\n"); 6243 return -EINVAL; 6244 } 6245 6246 err = check_reg_arg(env, insn->dst_reg, DST_OP); 6247 if (err) 6248 return err; 6249 6250 if (insn->src_reg == 0) { 6251 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; 6252 6253 regs[insn->dst_reg].type = SCALAR_VALUE; 6254 __mark_reg_known(®s[insn->dst_reg], imm); 6255 return 0; 6256 } 6257 6258 map = env->used_maps[aux->map_index]; 6259 mark_reg_known_zero(env, regs, insn->dst_reg); 6260 regs[insn->dst_reg].map_ptr = map; 6261 6262 if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) { 6263 regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; 6264 regs[insn->dst_reg].off = aux->map_off; 6265 if (map_value_has_spin_lock(map)) 6266 regs[insn->dst_reg].id = ++env->id_gen; 6267 } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) { 6268 regs[insn->dst_reg].type = CONST_PTR_TO_MAP; 6269 } else { 6270 verbose(env, "bpf verifier is misconfigured\n"); 6271 return -EINVAL; 6272 } 6273 6274 return 0; 6275 } 6276 6277 static bool may_access_skb(enum bpf_prog_type type) 6278 { 6279 switch (type) { 6280 case BPF_PROG_TYPE_SOCKET_FILTER: 6281 case BPF_PROG_TYPE_SCHED_CLS: 6282 case BPF_PROG_TYPE_SCHED_ACT: 6283 return true; 6284 default: 6285 return false; 6286 } 6287 } 6288 6289 /* verify safety of LD_ABS|LD_IND instructions: 6290 * - they can only appear in the programs where ctx == skb 6291 * - since they are wrappers of function calls, they scratch R1-R5 registers, 6292 * preserve R6-R9, and store return value into R0 6293 * 6294 * Implicit input: 6295 * ctx == skb == R6 == CTX 6296 * 6297 * Explicit input: 6298 * SRC == any register 6299 * IMM == 32-bit immediate 6300 * 6301 * Output: 6302 * R0 - 8/16/32-bit skb data converted to cpu endianness 6303 */ 6304 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) 6305 { 6306 struct bpf_reg_state *regs = cur_regs(env); 6307 u8 mode = BPF_MODE(insn->code); 6308 int i, err; 6309 6310 if (!may_access_skb(env->prog->type)) { 6311 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n"); 6312 return -EINVAL; 6313 } 6314 6315 if (!env->ops->gen_ld_abs) { 6316 verbose(env, "bpf verifier is misconfigured\n"); 6317 return -EINVAL; 6318 } 6319 6320 if (env->subprog_cnt > 1) { 6321 /* when program has LD_ABS insn JITs and interpreter assume 6322 * that r1 == ctx == skb which is not the case for callees 6323 * that can have arbitrary arguments. It's problematic 6324 * for main prog as well since JITs would need to analyze 6325 * all functions in order to make proper register save/restore 6326 * decisions in the main prog. Hence disallow LD_ABS with calls 6327 */ 6328 verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n"); 6329 return -EINVAL; 6330 } 6331 6332 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || 6333 BPF_SIZE(insn->code) == BPF_DW || 6334 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { 6335 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n"); 6336 return -EINVAL; 6337 } 6338 6339 /* check whether implicit source operand (register R6) is readable */ 6340 err = check_reg_arg(env, BPF_REG_6, SRC_OP); 6341 if (err) 6342 return err; 6343 6344 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as 6345 * gen_ld_abs() may terminate the program at runtime, leading to 6346 * reference leak. 6347 */ 6348 err = check_reference_leak(env); 6349 if (err) { 6350 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n"); 6351 return err; 6352 } 6353 6354 if (env->cur_state->active_spin_lock) { 6355 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n"); 6356 return -EINVAL; 6357 } 6358 6359 if (regs[BPF_REG_6].type != PTR_TO_CTX) { 6360 verbose(env, 6361 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); 6362 return -EINVAL; 6363 } 6364 6365 if (mode == BPF_IND) { 6366 /* check explicit source operand */ 6367 err = check_reg_arg(env, insn->src_reg, SRC_OP); 6368 if (err) 6369 return err; 6370 } 6371 6372 /* reset caller saved regs to unreadable */ 6373 for (i = 0; i < CALLER_SAVED_REGS; i++) { 6374 mark_reg_not_init(env, regs, caller_saved[i]); 6375 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 6376 } 6377 6378 /* mark destination R0 register as readable, since it contains 6379 * the value fetched from the packet. 6380 * Already marked as written above. 6381 */ 6382 mark_reg_unknown(env, regs, BPF_REG_0); 6383 /* ld_abs load up to 32-bit skb data. */ 6384 regs[BPF_REG_0].subreg_def = env->insn_idx + 1; 6385 return 0; 6386 } 6387 6388 static int check_return_code(struct bpf_verifier_env *env) 6389 { 6390 struct tnum enforce_attach_type_range = tnum_unknown; 6391 const struct bpf_prog *prog = env->prog; 6392 struct bpf_reg_state *reg; 6393 struct tnum range = tnum_range(0, 1); 6394 int err; 6395 6396 /* The struct_ops func-ptr's return type could be "void" */ 6397 if (env->prog->type == BPF_PROG_TYPE_STRUCT_OPS && 6398 !prog->aux->attach_func_proto->type) 6399 return 0; 6400 6401 /* eBPF calling convetion is such that R0 is used 6402 * to return the value from eBPF program. 6403 * Make sure that it's readable at this time 6404 * of bpf_exit, which means that program wrote 6405 * something into it earlier 6406 */ 6407 err = check_reg_arg(env, BPF_REG_0, SRC_OP); 6408 if (err) 6409 return err; 6410 6411 if (is_pointer_value(env, BPF_REG_0)) { 6412 verbose(env, "R0 leaks addr as return value\n"); 6413 return -EACCES; 6414 } 6415 6416 switch (env->prog->type) { 6417 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 6418 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || 6419 env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG) 6420 range = tnum_range(1, 1); 6421 break; 6422 case BPF_PROG_TYPE_CGROUP_SKB: 6423 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) { 6424 range = tnum_range(0, 3); 6425 enforce_attach_type_range = tnum_range(2, 3); 6426 } 6427 break; 6428 case BPF_PROG_TYPE_CGROUP_SOCK: 6429 case BPF_PROG_TYPE_SOCK_OPS: 6430 case BPF_PROG_TYPE_CGROUP_DEVICE: 6431 case BPF_PROG_TYPE_CGROUP_SYSCTL: 6432 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 6433 break; 6434 case BPF_PROG_TYPE_RAW_TRACEPOINT: 6435 if (!env->prog->aux->attach_btf_id) 6436 return 0; 6437 range = tnum_const(0); 6438 break; 6439 default: 6440 return 0; 6441 } 6442 6443 reg = cur_regs(env) + BPF_REG_0; 6444 if (reg->type != SCALAR_VALUE) { 6445 verbose(env, "At program exit the register R0 is not a known value (%s)\n", 6446 reg_type_str[reg->type]); 6447 return -EINVAL; 6448 } 6449 6450 if (!tnum_in(range, reg->var_off)) { 6451 char tn_buf[48]; 6452 6453 verbose(env, "At program exit the register R0 "); 6454 if (!tnum_is_unknown(reg->var_off)) { 6455 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 6456 verbose(env, "has value %s", tn_buf); 6457 } else { 6458 verbose(env, "has unknown scalar value"); 6459 } 6460 tnum_strn(tn_buf, sizeof(tn_buf), range); 6461 verbose(env, " should have been in %s\n", tn_buf); 6462 return -EINVAL; 6463 } 6464 6465 if (!tnum_is_unknown(enforce_attach_type_range) && 6466 tnum_in(enforce_attach_type_range, reg->var_off)) 6467 env->prog->enforce_expected_attach_type = 1; 6468 return 0; 6469 } 6470 6471 /* non-recursive DFS pseudo code 6472 * 1 procedure DFS-iterative(G,v): 6473 * 2 label v as discovered 6474 * 3 let S be a stack 6475 * 4 S.push(v) 6476 * 5 while S is not empty 6477 * 6 t <- S.pop() 6478 * 7 if t is what we're looking for: 6479 * 8 return t 6480 * 9 for all edges e in G.adjacentEdges(t) do 6481 * 10 if edge e is already labelled 6482 * 11 continue with the next edge 6483 * 12 w <- G.adjacentVertex(t,e) 6484 * 13 if vertex w is not discovered and not explored 6485 * 14 label e as tree-edge 6486 * 15 label w as discovered 6487 * 16 S.push(w) 6488 * 17 continue at 5 6489 * 18 else if vertex w is discovered 6490 * 19 label e as back-edge 6491 * 20 else 6492 * 21 // vertex w is explored 6493 * 22 label e as forward- or cross-edge 6494 * 23 label t as explored 6495 * 24 S.pop() 6496 * 6497 * convention: 6498 * 0x10 - discovered 6499 * 0x11 - discovered and fall-through edge labelled 6500 * 0x12 - discovered and fall-through and branch edges labelled 6501 * 0x20 - explored 6502 */ 6503 6504 enum { 6505 DISCOVERED = 0x10, 6506 EXPLORED = 0x20, 6507 FALLTHROUGH = 1, 6508 BRANCH = 2, 6509 }; 6510 6511 static u32 state_htab_size(struct bpf_verifier_env *env) 6512 { 6513 return env->prog->len; 6514 } 6515 6516 static struct bpf_verifier_state_list **explored_state( 6517 struct bpf_verifier_env *env, 6518 int idx) 6519 { 6520 struct bpf_verifier_state *cur = env->cur_state; 6521 struct bpf_func_state *state = cur->frame[cur->curframe]; 6522 6523 return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; 6524 } 6525 6526 static void init_explored_state(struct bpf_verifier_env *env, int idx) 6527 { 6528 env->insn_aux_data[idx].prune_point = true; 6529 } 6530 6531 /* t, w, e - match pseudo-code above: 6532 * t - index of current instruction 6533 * w - next instruction 6534 * e - edge 6535 */ 6536 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, 6537 bool loop_ok) 6538 { 6539 int *insn_stack = env->cfg.insn_stack; 6540 int *insn_state = env->cfg.insn_state; 6541 6542 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) 6543 return 0; 6544 6545 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH)) 6546 return 0; 6547 6548 if (w < 0 || w >= env->prog->len) { 6549 verbose_linfo(env, t, "%d: ", t); 6550 verbose(env, "jump out of range from insn %d to %d\n", t, w); 6551 return -EINVAL; 6552 } 6553 6554 if (e == BRANCH) 6555 /* mark branch target for state pruning */ 6556 init_explored_state(env, w); 6557 6558 if (insn_state[w] == 0) { 6559 /* tree-edge */ 6560 insn_state[t] = DISCOVERED | e; 6561 insn_state[w] = DISCOVERED; 6562 if (env->cfg.cur_stack >= env->prog->len) 6563 return -E2BIG; 6564 insn_stack[env->cfg.cur_stack++] = w; 6565 return 1; 6566 } else if ((insn_state[w] & 0xF0) == DISCOVERED) { 6567 if (loop_ok && env->allow_ptr_leaks) 6568 return 0; 6569 verbose_linfo(env, t, "%d: ", t); 6570 verbose_linfo(env, w, "%d: ", w); 6571 verbose(env, "back-edge from insn %d to %d\n", t, w); 6572 return -EINVAL; 6573 } else if (insn_state[w] == EXPLORED) { 6574 /* forward- or cross-edge */ 6575 insn_state[t] = DISCOVERED | e; 6576 } else { 6577 verbose(env, "insn state internal bug\n"); 6578 return -EFAULT; 6579 } 6580 return 0; 6581 } 6582 6583 /* non-recursive depth-first-search to detect loops in BPF program 6584 * loop == back-edge in directed graph 6585 */ 6586 static int check_cfg(struct bpf_verifier_env *env) 6587 { 6588 struct bpf_insn *insns = env->prog->insnsi; 6589 int insn_cnt = env->prog->len; 6590 int *insn_stack, *insn_state; 6591 int ret = 0; 6592 int i, t; 6593 6594 insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); 6595 if (!insn_state) 6596 return -ENOMEM; 6597 6598 insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); 6599 if (!insn_stack) { 6600 kvfree(insn_state); 6601 return -ENOMEM; 6602 } 6603 6604 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */ 6605 insn_stack[0] = 0; /* 0 is the first instruction */ 6606 env->cfg.cur_stack = 1; 6607 6608 peek_stack: 6609 if (env->cfg.cur_stack == 0) 6610 goto check_state; 6611 t = insn_stack[env->cfg.cur_stack - 1]; 6612 6613 if (BPF_CLASS(insns[t].code) == BPF_JMP || 6614 BPF_CLASS(insns[t].code) == BPF_JMP32) { 6615 u8 opcode = BPF_OP(insns[t].code); 6616 6617 if (opcode == BPF_EXIT) { 6618 goto mark_explored; 6619 } else if (opcode == BPF_CALL) { 6620 ret = push_insn(t, t + 1, FALLTHROUGH, env, false); 6621 if (ret == 1) 6622 goto peek_stack; 6623 else if (ret < 0) 6624 goto err_free; 6625 if (t + 1 < insn_cnt) 6626 init_explored_state(env, t + 1); 6627 if (insns[t].src_reg == BPF_PSEUDO_CALL) { 6628 init_explored_state(env, t); 6629 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, 6630 env, false); 6631 if (ret == 1) 6632 goto peek_stack; 6633 else if (ret < 0) 6634 goto err_free; 6635 } 6636 } else if (opcode == BPF_JA) { 6637 if (BPF_SRC(insns[t].code) != BPF_K) { 6638 ret = -EINVAL; 6639 goto err_free; 6640 } 6641 /* unconditional jump with single edge */ 6642 ret = push_insn(t, t + insns[t].off + 1, 6643 FALLTHROUGH, env, true); 6644 if (ret == 1) 6645 goto peek_stack; 6646 else if (ret < 0) 6647 goto err_free; 6648 /* unconditional jmp is not a good pruning point, 6649 * but it's marked, since backtracking needs 6650 * to record jmp history in is_state_visited(). 6651 */ 6652 init_explored_state(env, t + insns[t].off + 1); 6653 /* tell verifier to check for equivalent states 6654 * after every call and jump 6655 */ 6656 if (t + 1 < insn_cnt) 6657 init_explored_state(env, t + 1); 6658 } else { 6659 /* conditional jump with two edges */ 6660 init_explored_state(env, t); 6661 ret = push_insn(t, t + 1, FALLTHROUGH, env, true); 6662 if (ret == 1) 6663 goto peek_stack; 6664 else if (ret < 0) 6665 goto err_free; 6666 6667 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true); 6668 if (ret == 1) 6669 goto peek_stack; 6670 else if (ret < 0) 6671 goto err_free; 6672 } 6673 } else { 6674 /* all other non-branch instructions with single 6675 * fall-through edge 6676 */ 6677 ret = push_insn(t, t + 1, FALLTHROUGH, env, false); 6678 if (ret == 1) 6679 goto peek_stack; 6680 else if (ret < 0) 6681 goto err_free; 6682 } 6683 6684 mark_explored: 6685 insn_state[t] = EXPLORED; 6686 if (env->cfg.cur_stack-- <= 0) { 6687 verbose(env, "pop stack internal bug\n"); 6688 ret = -EFAULT; 6689 goto err_free; 6690 } 6691 goto peek_stack; 6692 6693 check_state: 6694 for (i = 0; i < insn_cnt; i++) { 6695 if (insn_state[i] != EXPLORED) { 6696 verbose(env, "unreachable insn %d\n", i); 6697 ret = -EINVAL; 6698 goto err_free; 6699 } 6700 } 6701 ret = 0; /* cfg looks good */ 6702 6703 err_free: 6704 kvfree(insn_state); 6705 kvfree(insn_stack); 6706 env->cfg.insn_state = env->cfg.insn_stack = NULL; 6707 return ret; 6708 } 6709 6710 /* The minimum supported BTF func info size */ 6711 #define MIN_BPF_FUNCINFO_SIZE 8 6712 #define MAX_FUNCINFO_REC_SIZE 252 6713 6714 static int check_btf_func(struct bpf_verifier_env *env, 6715 const union bpf_attr *attr, 6716 union bpf_attr __user *uattr) 6717 { 6718 u32 i, nfuncs, urec_size, min_size; 6719 u32 krec_size = sizeof(struct bpf_func_info); 6720 struct bpf_func_info *krecord; 6721 struct bpf_func_info_aux *info_aux = NULL; 6722 const struct btf_type *type; 6723 struct bpf_prog *prog; 6724 const struct btf *btf; 6725 void __user *urecord; 6726 u32 prev_offset = 0; 6727 int ret = 0; 6728 6729 nfuncs = attr->func_info_cnt; 6730 if (!nfuncs) 6731 return 0; 6732 6733 if (nfuncs != env->subprog_cnt) { 6734 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n"); 6735 return -EINVAL; 6736 } 6737 6738 urec_size = attr->func_info_rec_size; 6739 if (urec_size < MIN_BPF_FUNCINFO_SIZE || 6740 urec_size > MAX_FUNCINFO_REC_SIZE || 6741 urec_size % sizeof(u32)) { 6742 verbose(env, "invalid func info rec size %u\n", urec_size); 6743 return -EINVAL; 6744 } 6745 6746 prog = env->prog; 6747 btf = prog->aux->btf; 6748 6749 urecord = u64_to_user_ptr(attr->func_info); 6750 min_size = min_t(u32, krec_size, urec_size); 6751 6752 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN); 6753 if (!krecord) 6754 return -ENOMEM; 6755 info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN); 6756 if (!info_aux) 6757 goto err_free; 6758 6759 for (i = 0; i < nfuncs; i++) { 6760 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size); 6761 if (ret) { 6762 if (ret == -E2BIG) { 6763 verbose(env, "nonzero tailing record in func info"); 6764 /* set the size kernel expects so loader can zero 6765 * out the rest of the record. 6766 */ 6767 if (put_user(min_size, &uattr->func_info_rec_size)) 6768 ret = -EFAULT; 6769 } 6770 goto err_free; 6771 } 6772 6773 if (copy_from_user(&krecord[i], urecord, min_size)) { 6774 ret = -EFAULT; 6775 goto err_free; 6776 } 6777 6778 /* check insn_off */ 6779 if (i == 0) { 6780 if (krecord[i].insn_off) { 6781 verbose(env, 6782 "nonzero insn_off %u for the first func info record", 6783 krecord[i].insn_off); 6784 ret = -EINVAL; 6785 goto err_free; 6786 } 6787 } else if (krecord[i].insn_off <= prev_offset) { 6788 verbose(env, 6789 "same or smaller insn offset (%u) than previous func info record (%u)", 6790 krecord[i].insn_off, prev_offset); 6791 ret = -EINVAL; 6792 goto err_free; 6793 } 6794 6795 if (env->subprog_info[i].start != krecord[i].insn_off) { 6796 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n"); 6797 ret = -EINVAL; 6798 goto err_free; 6799 } 6800 6801 /* check type_id */ 6802 type = btf_type_by_id(btf, krecord[i].type_id); 6803 if (!type || !btf_type_is_func(type)) { 6804 verbose(env, "invalid type id %d in func info", 6805 krecord[i].type_id); 6806 ret = -EINVAL; 6807 goto err_free; 6808 } 6809 info_aux[i].linkage = BTF_INFO_VLEN(type->info); 6810 prev_offset = krecord[i].insn_off; 6811 urecord += urec_size; 6812 } 6813 6814 prog->aux->func_info = krecord; 6815 prog->aux->func_info_cnt = nfuncs; 6816 prog->aux->func_info_aux = info_aux; 6817 return 0; 6818 6819 err_free: 6820 kvfree(krecord); 6821 kfree(info_aux); 6822 return ret; 6823 } 6824 6825 static void adjust_btf_func(struct bpf_verifier_env *env) 6826 { 6827 struct bpf_prog_aux *aux = env->prog->aux; 6828 int i; 6829 6830 if (!aux->func_info) 6831 return; 6832 6833 for (i = 0; i < env->subprog_cnt; i++) 6834 aux->func_info[i].insn_off = env->subprog_info[i].start; 6835 } 6836 6837 #define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \ 6838 sizeof(((struct bpf_line_info *)(0))->line_col)) 6839 #define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE 6840 6841 static int check_btf_line(struct bpf_verifier_env *env, 6842 const union bpf_attr *attr, 6843 union bpf_attr __user *uattr) 6844 { 6845 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0; 6846 struct bpf_subprog_info *sub; 6847 struct bpf_line_info *linfo; 6848 struct bpf_prog *prog; 6849 const struct btf *btf; 6850 void __user *ulinfo; 6851 int err; 6852 6853 nr_linfo = attr->line_info_cnt; 6854 if (!nr_linfo) 6855 return 0; 6856 6857 rec_size = attr->line_info_rec_size; 6858 if (rec_size < MIN_BPF_LINEINFO_SIZE || 6859 rec_size > MAX_LINEINFO_REC_SIZE || 6860 rec_size & (sizeof(u32) - 1)) 6861 return -EINVAL; 6862 6863 /* Need to zero it in case the userspace may 6864 * pass in a smaller bpf_line_info object. 6865 */ 6866 linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info), 6867 GFP_KERNEL | __GFP_NOWARN); 6868 if (!linfo) 6869 return -ENOMEM; 6870 6871 prog = env->prog; 6872 btf = prog->aux->btf; 6873 6874 s = 0; 6875 sub = env->subprog_info; 6876 ulinfo = u64_to_user_ptr(attr->line_info); 6877 expected_size = sizeof(struct bpf_line_info); 6878 ncopy = min_t(u32, expected_size, rec_size); 6879 for (i = 0; i < nr_linfo; i++) { 6880 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size); 6881 if (err) { 6882 if (err == -E2BIG) { 6883 verbose(env, "nonzero tailing record in line_info"); 6884 if (put_user(expected_size, 6885 &uattr->line_info_rec_size)) 6886 err = -EFAULT; 6887 } 6888 goto err_free; 6889 } 6890 6891 if (copy_from_user(&linfo[i], ulinfo, ncopy)) { 6892 err = -EFAULT; 6893 goto err_free; 6894 } 6895 6896 /* 6897 * Check insn_off to ensure 6898 * 1) strictly increasing AND 6899 * 2) bounded by prog->len 6900 * 6901 * The linfo[0].insn_off == 0 check logically falls into 6902 * the later "missing bpf_line_info for func..." case 6903 * because the first linfo[0].insn_off must be the 6904 * first sub also and the first sub must have 6905 * subprog_info[0].start == 0. 6906 */ 6907 if ((i && linfo[i].insn_off <= prev_offset) || 6908 linfo[i].insn_off >= prog->len) { 6909 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n", 6910 i, linfo[i].insn_off, prev_offset, 6911 prog->len); 6912 err = -EINVAL; 6913 goto err_free; 6914 } 6915 6916 if (!prog->insnsi[linfo[i].insn_off].code) { 6917 verbose(env, 6918 "Invalid insn code at line_info[%u].insn_off\n", 6919 i); 6920 err = -EINVAL; 6921 goto err_free; 6922 } 6923 6924 if (!btf_name_by_offset(btf, linfo[i].line_off) || 6925 !btf_name_by_offset(btf, linfo[i].file_name_off)) { 6926 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i); 6927 err = -EINVAL; 6928 goto err_free; 6929 } 6930 6931 if (s != env->subprog_cnt) { 6932 if (linfo[i].insn_off == sub[s].start) { 6933 sub[s].linfo_idx = i; 6934 s++; 6935 } else if (sub[s].start < linfo[i].insn_off) { 6936 verbose(env, "missing bpf_line_info for func#%u\n", s); 6937 err = -EINVAL; 6938 goto err_free; 6939 } 6940 } 6941 6942 prev_offset = linfo[i].insn_off; 6943 ulinfo += rec_size; 6944 } 6945 6946 if (s != env->subprog_cnt) { 6947 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n", 6948 env->subprog_cnt - s, s); 6949 err = -EINVAL; 6950 goto err_free; 6951 } 6952 6953 prog->aux->linfo = linfo; 6954 prog->aux->nr_linfo = nr_linfo; 6955 6956 return 0; 6957 6958 err_free: 6959 kvfree(linfo); 6960 return err; 6961 } 6962 6963 static int check_btf_info(struct bpf_verifier_env *env, 6964 const union bpf_attr *attr, 6965 union bpf_attr __user *uattr) 6966 { 6967 struct btf *btf; 6968 int err; 6969 6970 if (!attr->func_info_cnt && !attr->line_info_cnt) 6971 return 0; 6972 6973 btf = btf_get_by_fd(attr->prog_btf_fd); 6974 if (IS_ERR(btf)) 6975 return PTR_ERR(btf); 6976 env->prog->aux->btf = btf; 6977 6978 err = check_btf_func(env, attr, uattr); 6979 if (err) 6980 return err; 6981 6982 err = check_btf_line(env, attr, uattr); 6983 if (err) 6984 return err; 6985 6986 return 0; 6987 } 6988 6989 /* check %cur's range satisfies %old's */ 6990 static bool range_within(struct bpf_reg_state *old, 6991 struct bpf_reg_state *cur) 6992 { 6993 return old->umin_value <= cur->umin_value && 6994 old->umax_value >= cur->umax_value && 6995 old->smin_value <= cur->smin_value && 6996 old->smax_value >= cur->smax_value; 6997 } 6998 6999 /* Maximum number of register states that can exist at once */ 7000 #define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) 7001 struct idpair { 7002 u32 old; 7003 u32 cur; 7004 }; 7005 7006 /* If in the old state two registers had the same id, then they need to have 7007 * the same id in the new state as well. But that id could be different from 7008 * the old state, so we need to track the mapping from old to new ids. 7009 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent 7010 * regs with old id 5 must also have new id 9 for the new state to be safe. But 7011 * regs with a different old id could still have new id 9, we don't care about 7012 * that. 7013 * So we look through our idmap to see if this old id has been seen before. If 7014 * so, we require the new id to match; otherwise, we add the id pair to the map. 7015 */ 7016 static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap) 7017 { 7018 unsigned int i; 7019 7020 for (i = 0; i < ID_MAP_SIZE; i++) { 7021 if (!idmap[i].old) { 7022 /* Reached an empty slot; haven't seen this id before */ 7023 idmap[i].old = old_id; 7024 idmap[i].cur = cur_id; 7025 return true; 7026 } 7027 if (idmap[i].old == old_id) 7028 return idmap[i].cur == cur_id; 7029 } 7030 /* We ran out of idmap slots, which should be impossible */ 7031 WARN_ON_ONCE(1); 7032 return false; 7033 } 7034 7035 static void clean_func_state(struct bpf_verifier_env *env, 7036 struct bpf_func_state *st) 7037 { 7038 enum bpf_reg_liveness live; 7039 int i, j; 7040 7041 for (i = 0; i < BPF_REG_FP; i++) { 7042 live = st->regs[i].live; 7043 /* liveness must not touch this register anymore */ 7044 st->regs[i].live |= REG_LIVE_DONE; 7045 if (!(live & REG_LIVE_READ)) 7046 /* since the register is unused, clear its state 7047 * to make further comparison simpler 7048 */ 7049 __mark_reg_not_init(env, &st->regs[i]); 7050 } 7051 7052 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) { 7053 live = st->stack[i].spilled_ptr.live; 7054 /* liveness must not touch this stack slot anymore */ 7055 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE; 7056 if (!(live & REG_LIVE_READ)) { 7057 __mark_reg_not_init(env, &st->stack[i].spilled_ptr); 7058 for (j = 0; j < BPF_REG_SIZE; j++) 7059 st->stack[i].slot_type[j] = STACK_INVALID; 7060 } 7061 } 7062 } 7063 7064 static void clean_verifier_state(struct bpf_verifier_env *env, 7065 struct bpf_verifier_state *st) 7066 { 7067 int i; 7068 7069 if (st->frame[0]->regs[0].live & REG_LIVE_DONE) 7070 /* all regs in this state in all frames were already marked */ 7071 return; 7072 7073 for (i = 0; i <= st->curframe; i++) 7074 clean_func_state(env, st->frame[i]); 7075 } 7076 7077 /* the parentage chains form a tree. 7078 * the verifier states are added to state lists at given insn and 7079 * pushed into state stack for future exploration. 7080 * when the verifier reaches bpf_exit insn some of the verifer states 7081 * stored in the state lists have their final liveness state already, 7082 * but a lot of states will get revised from liveness point of view when 7083 * the verifier explores other branches. 7084 * Example: 7085 * 1: r0 = 1 7086 * 2: if r1 == 100 goto pc+1 7087 * 3: r0 = 2 7088 * 4: exit 7089 * when the verifier reaches exit insn the register r0 in the state list of 7090 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch 7091 * of insn 2 and goes exploring further. At the insn 4 it will walk the 7092 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ. 7093 * 7094 * Since the verifier pushes the branch states as it sees them while exploring 7095 * the program the condition of walking the branch instruction for the second 7096 * time means that all states below this branch were already explored and 7097 * their final liveness markes are already propagated. 7098 * Hence when the verifier completes the search of state list in is_state_visited() 7099 * we can call this clean_live_states() function to mark all liveness states 7100 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state' 7101 * will not be used. 7102 * This function also clears the registers and stack for states that !READ 7103 * to simplify state merging. 7104 * 7105 * Important note here that walking the same branch instruction in the callee 7106 * doesn't meant that the states are DONE. The verifier has to compare 7107 * the callsites 7108 */ 7109 static void clean_live_states(struct bpf_verifier_env *env, int insn, 7110 struct bpf_verifier_state *cur) 7111 { 7112 struct bpf_verifier_state_list *sl; 7113 int i; 7114 7115 sl = *explored_state(env, insn); 7116 while (sl) { 7117 if (sl->state.branches) 7118 goto next; 7119 if (sl->state.insn_idx != insn || 7120 sl->state.curframe != cur->curframe) 7121 goto next; 7122 for (i = 0; i <= cur->curframe; i++) 7123 if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) 7124 goto next; 7125 clean_verifier_state(env, &sl->state); 7126 next: 7127 sl = sl->next; 7128 } 7129 } 7130 7131 /* Returns true if (rold safe implies rcur safe) */ 7132 static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, 7133 struct idpair *idmap) 7134 { 7135 bool equal; 7136 7137 if (!(rold->live & REG_LIVE_READ)) 7138 /* explored state didn't use this */ 7139 return true; 7140 7141 equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0; 7142 7143 if (rold->type == PTR_TO_STACK) 7144 /* two stack pointers are equal only if they're pointing to 7145 * the same stack frame, since fp-8 in foo != fp-8 in bar 7146 */ 7147 return equal && rold->frameno == rcur->frameno; 7148 7149 if (equal) 7150 return true; 7151 7152 if (rold->type == NOT_INIT) 7153 /* explored state can't have used this */ 7154 return true; 7155 if (rcur->type == NOT_INIT) 7156 return false; 7157 switch (rold->type) { 7158 case SCALAR_VALUE: 7159 if (rcur->type == SCALAR_VALUE) { 7160 if (!rold->precise && !rcur->precise) 7161 return true; 7162 /* new val must satisfy old val knowledge */ 7163 return range_within(rold, rcur) && 7164 tnum_in(rold->var_off, rcur->var_off); 7165 } else { 7166 /* We're trying to use a pointer in place of a scalar. 7167 * Even if the scalar was unbounded, this could lead to 7168 * pointer leaks because scalars are allowed to leak 7169 * while pointers are not. We could make this safe in 7170 * special cases if root is calling us, but it's 7171 * probably not worth the hassle. 7172 */ 7173 return false; 7174 } 7175 case PTR_TO_MAP_VALUE: 7176 /* If the new min/max/var_off satisfy the old ones and 7177 * everything else matches, we are OK. 7178 * 'id' is not compared, since it's only used for maps with 7179 * bpf_spin_lock inside map element and in such cases if 7180 * the rest of the prog is valid for one map element then 7181 * it's valid for all map elements regardless of the key 7182 * used in bpf_map_lookup() 7183 */ 7184 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 7185 range_within(rold, rcur) && 7186 tnum_in(rold->var_off, rcur->var_off); 7187 case PTR_TO_MAP_VALUE_OR_NULL: 7188 /* a PTR_TO_MAP_VALUE could be safe to use as a 7189 * PTR_TO_MAP_VALUE_OR_NULL into the same map. 7190 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- 7191 * checked, doing so could have affected others with the same 7192 * id, and we can't check for that because we lost the id when 7193 * we converted to a PTR_TO_MAP_VALUE. 7194 */ 7195 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL) 7196 return false; 7197 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) 7198 return false; 7199 /* Check our ids match any regs they're supposed to */ 7200 return check_ids(rold->id, rcur->id, idmap); 7201 case PTR_TO_PACKET_META: 7202 case PTR_TO_PACKET: 7203 if (rcur->type != rold->type) 7204 return false; 7205 /* We must have at least as much range as the old ptr 7206 * did, so that any accesses which were safe before are 7207 * still safe. This is true even if old range < old off, 7208 * since someone could have accessed through (ptr - k), or 7209 * even done ptr -= k in a register, to get a safe access. 7210 */ 7211 if (rold->range > rcur->range) 7212 return false; 7213 /* If the offsets don't match, we can't trust our alignment; 7214 * nor can we be sure that we won't fall out of range. 7215 */ 7216 if (rold->off != rcur->off) 7217 return false; 7218 /* id relations must be preserved */ 7219 if (rold->id && !check_ids(rold->id, rcur->id, idmap)) 7220 return false; 7221 /* new val must satisfy old val knowledge */ 7222 return range_within(rold, rcur) && 7223 tnum_in(rold->var_off, rcur->var_off); 7224 case PTR_TO_CTX: 7225 case CONST_PTR_TO_MAP: 7226 case PTR_TO_PACKET_END: 7227 case PTR_TO_FLOW_KEYS: 7228 case PTR_TO_SOCKET: 7229 case PTR_TO_SOCKET_OR_NULL: 7230 case PTR_TO_SOCK_COMMON: 7231 case PTR_TO_SOCK_COMMON_OR_NULL: 7232 case PTR_TO_TCP_SOCK: 7233 case PTR_TO_TCP_SOCK_OR_NULL: 7234 case PTR_TO_XDP_SOCK: 7235 /* Only valid matches are exact, which memcmp() above 7236 * would have accepted 7237 */ 7238 default: 7239 /* Don't know what's going on, just say it's not safe */ 7240 return false; 7241 } 7242 7243 /* Shouldn't get here; if we do, say it's not safe */ 7244 WARN_ON_ONCE(1); 7245 return false; 7246 } 7247 7248 static bool stacksafe(struct bpf_func_state *old, 7249 struct bpf_func_state *cur, 7250 struct idpair *idmap) 7251 { 7252 int i, spi; 7253 7254 /* walk slots of the explored stack and ignore any additional 7255 * slots in the current stack, since explored(safe) state 7256 * didn't use them 7257 */ 7258 for (i = 0; i < old->allocated_stack; i++) { 7259 spi = i / BPF_REG_SIZE; 7260 7261 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) { 7262 i += BPF_REG_SIZE - 1; 7263 /* explored state didn't use this */ 7264 continue; 7265 } 7266 7267 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) 7268 continue; 7269 7270 /* explored stack has more populated slots than current stack 7271 * and these slots were used 7272 */ 7273 if (i >= cur->allocated_stack) 7274 return false; 7275 7276 /* if old state was safe with misc data in the stack 7277 * it will be safe with zero-initialized stack. 7278 * The opposite is not true 7279 */ 7280 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC && 7281 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO) 7282 continue; 7283 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != 7284 cur->stack[spi].slot_type[i % BPF_REG_SIZE]) 7285 /* Ex: old explored (safe) state has STACK_SPILL in 7286 * this stack slot, but current has has STACK_MISC -> 7287 * this verifier states are not equivalent, 7288 * return false to continue verification of this path 7289 */ 7290 return false; 7291 if (i % BPF_REG_SIZE) 7292 continue; 7293 if (old->stack[spi].slot_type[0] != STACK_SPILL) 7294 continue; 7295 if (!regsafe(&old->stack[spi].spilled_ptr, 7296 &cur->stack[spi].spilled_ptr, 7297 idmap)) 7298 /* when explored and current stack slot are both storing 7299 * spilled registers, check that stored pointers types 7300 * are the same as well. 7301 * Ex: explored safe path could have stored 7302 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8} 7303 * but current path has stored: 7304 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16} 7305 * such verifier states are not equivalent. 7306 * return false to continue verification of this path 7307 */ 7308 return false; 7309 } 7310 return true; 7311 } 7312 7313 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur) 7314 { 7315 if (old->acquired_refs != cur->acquired_refs) 7316 return false; 7317 return !memcmp(old->refs, cur->refs, 7318 sizeof(*old->refs) * old->acquired_refs); 7319 } 7320 7321 /* compare two verifier states 7322 * 7323 * all states stored in state_list are known to be valid, since 7324 * verifier reached 'bpf_exit' instruction through them 7325 * 7326 * this function is called when verifier exploring different branches of 7327 * execution popped from the state stack. If it sees an old state that has 7328 * more strict register state and more strict stack state then this execution 7329 * branch doesn't need to be explored further, since verifier already 7330 * concluded that more strict state leads to valid finish. 7331 * 7332 * Therefore two states are equivalent if register state is more conservative 7333 * and explored stack state is more conservative than the current one. 7334 * Example: 7335 * explored current 7336 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC) 7337 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC) 7338 * 7339 * In other words if current stack state (one being explored) has more 7340 * valid slots than old one that already passed validation, it means 7341 * the verifier can stop exploring and conclude that current state is valid too 7342 * 7343 * Similarly with registers. If explored state has register type as invalid 7344 * whereas register type in current state is meaningful, it means that 7345 * the current state will reach 'bpf_exit' instruction safely 7346 */ 7347 static bool func_states_equal(struct bpf_func_state *old, 7348 struct bpf_func_state *cur) 7349 { 7350 struct idpair *idmap; 7351 bool ret = false; 7352 int i; 7353 7354 idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL); 7355 /* If we failed to allocate the idmap, just say it's not safe */ 7356 if (!idmap) 7357 return false; 7358 7359 for (i = 0; i < MAX_BPF_REG; i++) { 7360 if (!regsafe(&old->regs[i], &cur->regs[i], idmap)) 7361 goto out_free; 7362 } 7363 7364 if (!stacksafe(old, cur, idmap)) 7365 goto out_free; 7366 7367 if (!refsafe(old, cur)) 7368 goto out_free; 7369 ret = true; 7370 out_free: 7371 kfree(idmap); 7372 return ret; 7373 } 7374 7375 static bool states_equal(struct bpf_verifier_env *env, 7376 struct bpf_verifier_state *old, 7377 struct bpf_verifier_state *cur) 7378 { 7379 int i; 7380 7381 if (old->curframe != cur->curframe) 7382 return false; 7383 7384 /* Verification state from speculative execution simulation 7385 * must never prune a non-speculative execution one. 7386 */ 7387 if (old->speculative && !cur->speculative) 7388 return false; 7389 7390 if (old->active_spin_lock != cur->active_spin_lock) 7391 return false; 7392 7393 /* for states to be equal callsites have to be the same 7394 * and all frame states need to be equivalent 7395 */ 7396 for (i = 0; i <= old->curframe; i++) { 7397 if (old->frame[i]->callsite != cur->frame[i]->callsite) 7398 return false; 7399 if (!func_states_equal(old->frame[i], cur->frame[i])) 7400 return false; 7401 } 7402 return true; 7403 } 7404 7405 /* Return 0 if no propagation happened. Return negative error code if error 7406 * happened. Otherwise, return the propagated bit. 7407 */ 7408 static int propagate_liveness_reg(struct bpf_verifier_env *env, 7409 struct bpf_reg_state *reg, 7410 struct bpf_reg_state *parent_reg) 7411 { 7412 u8 parent_flag = parent_reg->live & REG_LIVE_READ; 7413 u8 flag = reg->live & REG_LIVE_READ; 7414 int err; 7415 7416 /* When comes here, read flags of PARENT_REG or REG could be any of 7417 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need 7418 * of propagation if PARENT_REG has strongest REG_LIVE_READ64. 7419 */ 7420 if (parent_flag == REG_LIVE_READ64 || 7421 /* Or if there is no read flag from REG. */ 7422 !flag || 7423 /* Or if the read flag from REG is the same as PARENT_REG. */ 7424 parent_flag == flag) 7425 return 0; 7426 7427 err = mark_reg_read(env, reg, parent_reg, flag); 7428 if (err) 7429 return err; 7430 7431 return flag; 7432 } 7433 7434 /* A write screens off any subsequent reads; but write marks come from the 7435 * straight-line code between a state and its parent. When we arrive at an 7436 * equivalent state (jump target or such) we didn't arrive by the straight-line 7437 * code, so read marks in the state must propagate to the parent regardless 7438 * of the state's write marks. That's what 'parent == state->parent' comparison 7439 * in mark_reg_read() is for. 7440 */ 7441 static int propagate_liveness(struct bpf_verifier_env *env, 7442 const struct bpf_verifier_state *vstate, 7443 struct bpf_verifier_state *vparent) 7444 { 7445 struct bpf_reg_state *state_reg, *parent_reg; 7446 struct bpf_func_state *state, *parent; 7447 int i, frame, err = 0; 7448 7449 if (vparent->curframe != vstate->curframe) { 7450 WARN(1, "propagate_live: parent frame %d current frame %d\n", 7451 vparent->curframe, vstate->curframe); 7452 return -EFAULT; 7453 } 7454 /* Propagate read liveness of registers... */ 7455 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); 7456 for (frame = 0; frame <= vstate->curframe; frame++) { 7457 parent = vparent->frame[frame]; 7458 state = vstate->frame[frame]; 7459 parent_reg = parent->regs; 7460 state_reg = state->regs; 7461 /* We don't need to worry about FP liveness, it's read-only */ 7462 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { 7463 err = propagate_liveness_reg(env, &state_reg[i], 7464 &parent_reg[i]); 7465 if (err < 0) 7466 return err; 7467 if (err == REG_LIVE_READ64) 7468 mark_insn_zext(env, &parent_reg[i]); 7469 } 7470 7471 /* Propagate stack slots. */ 7472 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && 7473 i < parent->allocated_stack / BPF_REG_SIZE; i++) { 7474 parent_reg = &parent->stack[i].spilled_ptr; 7475 state_reg = &state->stack[i].spilled_ptr; 7476 err = propagate_liveness_reg(env, state_reg, 7477 parent_reg); 7478 if (err < 0) 7479 return err; 7480 } 7481 } 7482 return 0; 7483 } 7484 7485 /* find precise scalars in the previous equivalent state and 7486 * propagate them into the current state 7487 */ 7488 static int propagate_precision(struct bpf_verifier_env *env, 7489 const struct bpf_verifier_state *old) 7490 { 7491 struct bpf_reg_state *state_reg; 7492 struct bpf_func_state *state; 7493 int i, err = 0; 7494 7495 state = old->frame[old->curframe]; 7496 state_reg = state->regs; 7497 for (i = 0; i < BPF_REG_FP; i++, state_reg++) { 7498 if (state_reg->type != SCALAR_VALUE || 7499 !state_reg->precise) 7500 continue; 7501 if (env->log.level & BPF_LOG_LEVEL2) 7502 verbose(env, "propagating r%d\n", i); 7503 err = mark_chain_precision(env, i); 7504 if (err < 0) 7505 return err; 7506 } 7507 7508 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 7509 if (state->stack[i].slot_type[0] != STACK_SPILL) 7510 continue; 7511 state_reg = &state->stack[i].spilled_ptr; 7512 if (state_reg->type != SCALAR_VALUE || 7513 !state_reg->precise) 7514 continue; 7515 if (env->log.level & BPF_LOG_LEVEL2) 7516 verbose(env, "propagating fp%d\n", 7517 (-i - 1) * BPF_REG_SIZE); 7518 err = mark_chain_precision_stack(env, i); 7519 if (err < 0) 7520 return err; 7521 } 7522 return 0; 7523 } 7524 7525 static bool states_maybe_looping(struct bpf_verifier_state *old, 7526 struct bpf_verifier_state *cur) 7527 { 7528 struct bpf_func_state *fold, *fcur; 7529 int i, fr = cur->curframe; 7530 7531 if (old->curframe != fr) 7532 return false; 7533 7534 fold = old->frame[fr]; 7535 fcur = cur->frame[fr]; 7536 for (i = 0; i < MAX_BPF_REG; i++) 7537 if (memcmp(&fold->regs[i], &fcur->regs[i], 7538 offsetof(struct bpf_reg_state, parent))) 7539 return false; 7540 return true; 7541 } 7542 7543 7544 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) 7545 { 7546 struct bpf_verifier_state_list *new_sl; 7547 struct bpf_verifier_state_list *sl, **pprev; 7548 struct bpf_verifier_state *cur = env->cur_state, *new; 7549 int i, j, err, states_cnt = 0; 7550 bool add_new_state = env->test_state_freq ? true : false; 7551 7552 cur->last_insn_idx = env->prev_insn_idx; 7553 if (!env->insn_aux_data[insn_idx].prune_point) 7554 /* this 'insn_idx' instruction wasn't marked, so we will not 7555 * be doing state search here 7556 */ 7557 return 0; 7558 7559 /* bpf progs typically have pruning point every 4 instructions 7560 * http://vger.kernel.org/bpfconf2019.html#session-1 7561 * Do not add new state for future pruning if the verifier hasn't seen 7562 * at least 2 jumps and at least 8 instructions. 7563 * This heuristics helps decrease 'total_states' and 'peak_states' metric. 7564 * In tests that amounts to up to 50% reduction into total verifier 7565 * memory consumption and 20% verifier time speedup. 7566 */ 7567 if (env->jmps_processed - env->prev_jmps_processed >= 2 && 7568 env->insn_processed - env->prev_insn_processed >= 8) 7569 add_new_state = true; 7570 7571 pprev = explored_state(env, insn_idx); 7572 sl = *pprev; 7573 7574 clean_live_states(env, insn_idx, cur); 7575 7576 while (sl) { 7577 states_cnt++; 7578 if (sl->state.insn_idx != insn_idx) 7579 goto next; 7580 if (sl->state.branches) { 7581 if (states_maybe_looping(&sl->state, cur) && 7582 states_equal(env, &sl->state, cur)) { 7583 verbose_linfo(env, insn_idx, "; "); 7584 verbose(env, "infinite loop detected at insn %d\n", insn_idx); 7585 return -EINVAL; 7586 } 7587 /* if the verifier is processing a loop, avoid adding new state 7588 * too often, since different loop iterations have distinct 7589 * states and may not help future pruning. 7590 * This threshold shouldn't be too low to make sure that 7591 * a loop with large bound will be rejected quickly. 7592 * The most abusive loop will be: 7593 * r1 += 1 7594 * if r1 < 1000000 goto pc-2 7595 * 1M insn_procssed limit / 100 == 10k peak states. 7596 * This threshold shouldn't be too high either, since states 7597 * at the end of the loop are likely to be useful in pruning. 7598 */ 7599 if (env->jmps_processed - env->prev_jmps_processed < 20 && 7600 env->insn_processed - env->prev_insn_processed < 100) 7601 add_new_state = false; 7602 goto miss; 7603 } 7604 if (states_equal(env, &sl->state, cur)) { 7605 sl->hit_cnt++; 7606 /* reached equivalent register/stack state, 7607 * prune the search. 7608 * Registers read by the continuation are read by us. 7609 * If we have any write marks in env->cur_state, they 7610 * will prevent corresponding reads in the continuation 7611 * from reaching our parent (an explored_state). Our 7612 * own state will get the read marks recorded, but 7613 * they'll be immediately forgotten as we're pruning 7614 * this state and will pop a new one. 7615 */ 7616 err = propagate_liveness(env, &sl->state, cur); 7617 7618 /* if previous state reached the exit with precision and 7619 * current state is equivalent to it (except precsion marks) 7620 * the precision needs to be propagated back in 7621 * the current state. 7622 */ 7623 err = err ? : push_jmp_history(env, cur); 7624 err = err ? : propagate_precision(env, &sl->state); 7625 if (err) 7626 return err; 7627 return 1; 7628 } 7629 miss: 7630 /* when new state is not going to be added do not increase miss count. 7631 * Otherwise several loop iterations will remove the state 7632 * recorded earlier. The goal of these heuristics is to have 7633 * states from some iterations of the loop (some in the beginning 7634 * and some at the end) to help pruning. 7635 */ 7636 if (add_new_state) 7637 sl->miss_cnt++; 7638 /* heuristic to determine whether this state is beneficial 7639 * to keep checking from state equivalence point of view. 7640 * Higher numbers increase max_states_per_insn and verification time, 7641 * but do not meaningfully decrease insn_processed. 7642 */ 7643 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) { 7644 /* the state is unlikely to be useful. Remove it to 7645 * speed up verification 7646 */ 7647 *pprev = sl->next; 7648 if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { 7649 u32 br = sl->state.branches; 7650 7651 WARN_ONCE(br, 7652 "BUG live_done but branches_to_explore %d\n", 7653 br); 7654 free_verifier_state(&sl->state, false); 7655 kfree(sl); 7656 env->peak_states--; 7657 } else { 7658 /* cannot free this state, since parentage chain may 7659 * walk it later. Add it for free_list instead to 7660 * be freed at the end of verification 7661 */ 7662 sl->next = env->free_list; 7663 env->free_list = sl; 7664 } 7665 sl = *pprev; 7666 continue; 7667 } 7668 next: 7669 pprev = &sl->next; 7670 sl = *pprev; 7671 } 7672 7673 if (env->max_states_per_insn < states_cnt) 7674 env->max_states_per_insn = states_cnt; 7675 7676 if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) 7677 return push_jmp_history(env, cur); 7678 7679 if (!add_new_state) 7680 return push_jmp_history(env, cur); 7681 7682 /* There were no equivalent states, remember the current one. 7683 * Technically the current state is not proven to be safe yet, 7684 * but it will either reach outer most bpf_exit (which means it's safe) 7685 * or it will be rejected. When there are no loops the verifier won't be 7686 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) 7687 * again on the way to bpf_exit. 7688 * When looping the sl->state.branches will be > 0 and this state 7689 * will not be considered for equivalence until branches == 0. 7690 */ 7691 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); 7692 if (!new_sl) 7693 return -ENOMEM; 7694 env->total_states++; 7695 env->peak_states++; 7696 env->prev_jmps_processed = env->jmps_processed; 7697 env->prev_insn_processed = env->insn_processed; 7698 7699 /* add new state to the head of linked list */ 7700 new = &new_sl->state; 7701 err = copy_verifier_state(new, cur); 7702 if (err) { 7703 free_verifier_state(new, false); 7704 kfree(new_sl); 7705 return err; 7706 } 7707 new->insn_idx = insn_idx; 7708 WARN_ONCE(new->branches != 1, 7709 "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx); 7710 7711 cur->parent = new; 7712 cur->first_insn_idx = insn_idx; 7713 clear_jmp_history(cur); 7714 new_sl->next = *explored_state(env, insn_idx); 7715 *explored_state(env, insn_idx) = new_sl; 7716 /* connect new state to parentage chain. Current frame needs all 7717 * registers connected. Only r6 - r9 of the callers are alive (pushed 7718 * to the stack implicitly by JITs) so in callers' frames connect just 7719 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to 7720 * the state of the call instruction (with WRITTEN set), and r0 comes 7721 * from callee with its full parentage chain, anyway. 7722 */ 7723 /* clear write marks in current state: the writes we did are not writes 7724 * our child did, so they don't screen off its reads from us. 7725 * (There are no read marks in current state, because reads always mark 7726 * their parent and current state never has children yet. Only 7727 * explored_states can get read marks.) 7728 */ 7729 for (j = 0; j <= cur->curframe; j++) { 7730 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) 7731 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i]; 7732 for (i = 0; i < BPF_REG_FP; i++) 7733 cur->frame[j]->regs[i].live = REG_LIVE_NONE; 7734 } 7735 7736 /* all stack frames are accessible from callee, clear them all */ 7737 for (j = 0; j <= cur->curframe; j++) { 7738 struct bpf_func_state *frame = cur->frame[j]; 7739 struct bpf_func_state *newframe = new->frame[j]; 7740 7741 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) { 7742 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE; 7743 frame->stack[i].spilled_ptr.parent = 7744 &newframe->stack[i].spilled_ptr; 7745 } 7746 } 7747 return 0; 7748 } 7749 7750 /* Return true if it's OK to have the same insn return a different type. */ 7751 static bool reg_type_mismatch_ok(enum bpf_reg_type type) 7752 { 7753 switch (type) { 7754 case PTR_TO_CTX: 7755 case PTR_TO_SOCKET: 7756 case PTR_TO_SOCKET_OR_NULL: 7757 case PTR_TO_SOCK_COMMON: 7758 case PTR_TO_SOCK_COMMON_OR_NULL: 7759 case PTR_TO_TCP_SOCK: 7760 case PTR_TO_TCP_SOCK_OR_NULL: 7761 case PTR_TO_XDP_SOCK: 7762 case PTR_TO_BTF_ID: 7763 return false; 7764 default: 7765 return true; 7766 } 7767 } 7768 7769 /* If an instruction was previously used with particular pointer types, then we 7770 * need to be careful to avoid cases such as the below, where it may be ok 7771 * for one branch accessing the pointer, but not ok for the other branch: 7772 * 7773 * R1 = sock_ptr 7774 * goto X; 7775 * ... 7776 * R1 = some_other_valid_ptr; 7777 * goto X; 7778 * ... 7779 * R2 = *(u32 *)(R1 + 0); 7780 */ 7781 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) 7782 { 7783 return src != prev && (!reg_type_mismatch_ok(src) || 7784 !reg_type_mismatch_ok(prev)); 7785 } 7786 7787 static int do_check(struct bpf_verifier_env *env) 7788 { 7789 struct bpf_verifier_state *state = env->cur_state; 7790 struct bpf_insn *insns = env->prog->insnsi; 7791 struct bpf_reg_state *regs; 7792 int insn_cnt = env->prog->len; 7793 bool do_print_state = false; 7794 int prev_insn_idx = -1; 7795 7796 for (;;) { 7797 struct bpf_insn *insn; 7798 u8 class; 7799 int err; 7800 7801 env->prev_insn_idx = prev_insn_idx; 7802 if (env->insn_idx >= insn_cnt) { 7803 verbose(env, "invalid insn idx %d insn_cnt %d\n", 7804 env->insn_idx, insn_cnt); 7805 return -EFAULT; 7806 } 7807 7808 insn = &insns[env->insn_idx]; 7809 class = BPF_CLASS(insn->code); 7810 7811 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { 7812 verbose(env, 7813 "BPF program is too large. Processed %d insn\n", 7814 env->insn_processed); 7815 return -E2BIG; 7816 } 7817 7818 err = is_state_visited(env, env->insn_idx); 7819 if (err < 0) 7820 return err; 7821 if (err == 1) { 7822 /* found equivalent state, can prune the search */ 7823 if (env->log.level & BPF_LOG_LEVEL) { 7824 if (do_print_state) 7825 verbose(env, "\nfrom %d to %d%s: safe\n", 7826 env->prev_insn_idx, env->insn_idx, 7827 env->cur_state->speculative ? 7828 " (speculative execution)" : ""); 7829 else 7830 verbose(env, "%d: safe\n", env->insn_idx); 7831 } 7832 goto process_bpf_exit; 7833 } 7834 7835 if (signal_pending(current)) 7836 return -EAGAIN; 7837 7838 if (need_resched()) 7839 cond_resched(); 7840 7841 if (env->log.level & BPF_LOG_LEVEL2 || 7842 (env->log.level & BPF_LOG_LEVEL && do_print_state)) { 7843 if (env->log.level & BPF_LOG_LEVEL2) 7844 verbose(env, "%d:", env->insn_idx); 7845 else 7846 verbose(env, "\nfrom %d to %d%s:", 7847 env->prev_insn_idx, env->insn_idx, 7848 env->cur_state->speculative ? 7849 " (speculative execution)" : ""); 7850 print_verifier_state(env, state->frame[state->curframe]); 7851 do_print_state = false; 7852 } 7853 7854 if (env->log.level & BPF_LOG_LEVEL) { 7855 const struct bpf_insn_cbs cbs = { 7856 .cb_print = verbose, 7857 .private_data = env, 7858 }; 7859 7860 verbose_linfo(env, env->insn_idx, "; "); 7861 verbose(env, "%d: ", env->insn_idx); 7862 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); 7863 } 7864 7865 if (bpf_prog_is_dev_bound(env->prog->aux)) { 7866 err = bpf_prog_offload_verify_insn(env, env->insn_idx, 7867 env->prev_insn_idx); 7868 if (err) 7869 return err; 7870 } 7871 7872 regs = cur_regs(env); 7873 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; 7874 prev_insn_idx = env->insn_idx; 7875 7876 if (class == BPF_ALU || class == BPF_ALU64) { 7877 err = check_alu_op(env, insn); 7878 if (err) 7879 return err; 7880 7881 } else if (class == BPF_LDX) { 7882 enum bpf_reg_type *prev_src_type, src_reg_type; 7883 7884 /* check for reserved fields is already done */ 7885 7886 /* check src operand */ 7887 err = check_reg_arg(env, insn->src_reg, SRC_OP); 7888 if (err) 7889 return err; 7890 7891 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 7892 if (err) 7893 return err; 7894 7895 src_reg_type = regs[insn->src_reg].type; 7896 7897 /* check that memory (src_reg + off) is readable, 7898 * the state of dst_reg will be updated by this func 7899 */ 7900 err = check_mem_access(env, env->insn_idx, insn->src_reg, 7901 insn->off, BPF_SIZE(insn->code), 7902 BPF_READ, insn->dst_reg, false); 7903 if (err) 7904 return err; 7905 7906 prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type; 7907 7908 if (*prev_src_type == NOT_INIT) { 7909 /* saw a valid insn 7910 * dst_reg = *(u32 *)(src_reg + off) 7911 * save type to validate intersecting paths 7912 */ 7913 *prev_src_type = src_reg_type; 7914 7915 } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) { 7916 /* ABuser program is trying to use the same insn 7917 * dst_reg = *(u32*) (src_reg + off) 7918 * with different pointer types: 7919 * src_reg == ctx in one branch and 7920 * src_reg == stack|map in some other branch. 7921 * Reject it. 7922 */ 7923 verbose(env, "same insn cannot be used with different pointers\n"); 7924 return -EINVAL; 7925 } 7926 7927 } else if (class == BPF_STX) { 7928 enum bpf_reg_type *prev_dst_type, dst_reg_type; 7929 7930 if (BPF_MODE(insn->code) == BPF_XADD) { 7931 err = check_xadd(env, env->insn_idx, insn); 7932 if (err) 7933 return err; 7934 env->insn_idx++; 7935 continue; 7936 } 7937 7938 /* check src1 operand */ 7939 err = check_reg_arg(env, insn->src_reg, SRC_OP); 7940 if (err) 7941 return err; 7942 /* check src2 operand */ 7943 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 7944 if (err) 7945 return err; 7946 7947 dst_reg_type = regs[insn->dst_reg].type; 7948 7949 /* check that memory (dst_reg + off) is writeable */ 7950 err = check_mem_access(env, env->insn_idx, insn->dst_reg, 7951 insn->off, BPF_SIZE(insn->code), 7952 BPF_WRITE, insn->src_reg, false); 7953 if (err) 7954 return err; 7955 7956 prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type; 7957 7958 if (*prev_dst_type == NOT_INIT) { 7959 *prev_dst_type = dst_reg_type; 7960 } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) { 7961 verbose(env, "same insn cannot be used with different pointers\n"); 7962 return -EINVAL; 7963 } 7964 7965 } else if (class == BPF_ST) { 7966 if (BPF_MODE(insn->code) != BPF_MEM || 7967 insn->src_reg != BPF_REG_0) { 7968 verbose(env, "BPF_ST uses reserved fields\n"); 7969 return -EINVAL; 7970 } 7971 /* check src operand */ 7972 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 7973 if (err) 7974 return err; 7975 7976 if (is_ctx_reg(env, insn->dst_reg)) { 7977 verbose(env, "BPF_ST stores into R%d %s is not allowed\n", 7978 insn->dst_reg, 7979 reg_type_str[reg_state(env, insn->dst_reg)->type]); 7980 return -EACCES; 7981 } 7982 7983 /* check that memory (dst_reg + off) is writeable */ 7984 err = check_mem_access(env, env->insn_idx, insn->dst_reg, 7985 insn->off, BPF_SIZE(insn->code), 7986 BPF_WRITE, -1, false); 7987 if (err) 7988 return err; 7989 7990 } else if (class == BPF_JMP || class == BPF_JMP32) { 7991 u8 opcode = BPF_OP(insn->code); 7992 7993 env->jmps_processed++; 7994 if (opcode == BPF_CALL) { 7995 if (BPF_SRC(insn->code) != BPF_K || 7996 insn->off != 0 || 7997 (insn->src_reg != BPF_REG_0 && 7998 insn->src_reg != BPF_PSEUDO_CALL) || 7999 insn->dst_reg != BPF_REG_0 || 8000 class == BPF_JMP32) { 8001 verbose(env, "BPF_CALL uses reserved fields\n"); 8002 return -EINVAL; 8003 } 8004 8005 if (env->cur_state->active_spin_lock && 8006 (insn->src_reg == BPF_PSEUDO_CALL || 8007 insn->imm != BPF_FUNC_spin_unlock)) { 8008 verbose(env, "function calls are not allowed while holding a lock\n"); 8009 return -EINVAL; 8010 } 8011 if (insn->src_reg == BPF_PSEUDO_CALL) 8012 err = check_func_call(env, insn, &env->insn_idx); 8013 else 8014 err = check_helper_call(env, insn->imm, env->insn_idx); 8015 if (err) 8016 return err; 8017 8018 } else if (opcode == BPF_JA) { 8019 if (BPF_SRC(insn->code) != BPF_K || 8020 insn->imm != 0 || 8021 insn->src_reg != BPF_REG_0 || 8022 insn->dst_reg != BPF_REG_0 || 8023 class == BPF_JMP32) { 8024 verbose(env, "BPF_JA uses reserved fields\n"); 8025 return -EINVAL; 8026 } 8027 8028 env->insn_idx += insn->off + 1; 8029 continue; 8030 8031 } else if (opcode == BPF_EXIT) { 8032 if (BPF_SRC(insn->code) != BPF_K || 8033 insn->imm != 0 || 8034 insn->src_reg != BPF_REG_0 || 8035 insn->dst_reg != BPF_REG_0 || 8036 class == BPF_JMP32) { 8037 verbose(env, "BPF_EXIT uses reserved fields\n"); 8038 return -EINVAL; 8039 } 8040 8041 if (env->cur_state->active_spin_lock) { 8042 verbose(env, "bpf_spin_unlock is missing\n"); 8043 return -EINVAL; 8044 } 8045 8046 if (state->curframe) { 8047 /* exit from nested function */ 8048 err = prepare_func_exit(env, &env->insn_idx); 8049 if (err) 8050 return err; 8051 do_print_state = true; 8052 continue; 8053 } 8054 8055 err = check_reference_leak(env); 8056 if (err) 8057 return err; 8058 8059 err = check_return_code(env); 8060 if (err) 8061 return err; 8062 process_bpf_exit: 8063 update_branch_counts(env, env->cur_state); 8064 err = pop_stack(env, &prev_insn_idx, 8065 &env->insn_idx); 8066 if (err < 0) { 8067 if (err != -ENOENT) 8068 return err; 8069 break; 8070 } else { 8071 do_print_state = true; 8072 continue; 8073 } 8074 } else { 8075 err = check_cond_jmp_op(env, insn, &env->insn_idx); 8076 if (err) 8077 return err; 8078 } 8079 } else if (class == BPF_LD) { 8080 u8 mode = BPF_MODE(insn->code); 8081 8082 if (mode == BPF_ABS || mode == BPF_IND) { 8083 err = check_ld_abs(env, insn); 8084 if (err) 8085 return err; 8086 8087 } else if (mode == BPF_IMM) { 8088 err = check_ld_imm(env, insn); 8089 if (err) 8090 return err; 8091 8092 env->insn_idx++; 8093 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; 8094 } else { 8095 verbose(env, "invalid BPF_LD mode\n"); 8096 return -EINVAL; 8097 } 8098 } else { 8099 verbose(env, "unknown insn class %d\n", class); 8100 return -EINVAL; 8101 } 8102 8103 env->insn_idx++; 8104 } 8105 8106 return 0; 8107 } 8108 8109 static int check_map_prealloc(struct bpf_map *map) 8110 { 8111 return (map->map_type != BPF_MAP_TYPE_HASH && 8112 map->map_type != BPF_MAP_TYPE_PERCPU_HASH && 8113 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) || 8114 !(map->map_flags & BPF_F_NO_PREALLOC); 8115 } 8116 8117 static bool is_tracing_prog_type(enum bpf_prog_type type) 8118 { 8119 switch (type) { 8120 case BPF_PROG_TYPE_KPROBE: 8121 case BPF_PROG_TYPE_TRACEPOINT: 8122 case BPF_PROG_TYPE_PERF_EVENT: 8123 case BPF_PROG_TYPE_RAW_TRACEPOINT: 8124 return true; 8125 default: 8126 return false; 8127 } 8128 } 8129 8130 static int check_map_prog_compatibility(struct bpf_verifier_env *env, 8131 struct bpf_map *map, 8132 struct bpf_prog *prog) 8133 8134 { 8135 /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use 8136 * preallocated hash maps, since doing memory allocation 8137 * in overflow_handler can crash depending on where nmi got 8138 * triggered. 8139 */ 8140 if (prog->type == BPF_PROG_TYPE_PERF_EVENT) { 8141 if (!check_map_prealloc(map)) { 8142 verbose(env, "perf_event programs can only use preallocated hash map\n"); 8143 return -EINVAL; 8144 } 8145 if (map->inner_map_meta && 8146 !check_map_prealloc(map->inner_map_meta)) { 8147 verbose(env, "perf_event programs can only use preallocated inner hash map\n"); 8148 return -EINVAL; 8149 } 8150 } 8151 8152 if ((is_tracing_prog_type(prog->type) || 8153 prog->type == BPF_PROG_TYPE_SOCKET_FILTER) && 8154 map_value_has_spin_lock(map)) { 8155 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); 8156 return -EINVAL; 8157 } 8158 8159 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && 8160 !bpf_offload_prog_map_match(prog, map)) { 8161 verbose(env, "offload device mismatch between prog and map\n"); 8162 return -EINVAL; 8163 } 8164 8165 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 8166 verbose(env, "bpf_struct_ops map cannot be used in prog\n"); 8167 return -EINVAL; 8168 } 8169 8170 return 0; 8171 } 8172 8173 static bool bpf_map_is_cgroup_storage(struct bpf_map *map) 8174 { 8175 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || 8176 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); 8177 } 8178 8179 /* look for pseudo eBPF instructions that access map FDs and 8180 * replace them with actual map pointers 8181 */ 8182 static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) 8183 { 8184 struct bpf_insn *insn = env->prog->insnsi; 8185 int insn_cnt = env->prog->len; 8186 int i, j, err; 8187 8188 err = bpf_prog_calc_tag(env->prog); 8189 if (err) 8190 return err; 8191 8192 for (i = 0; i < insn_cnt; i++, insn++) { 8193 if (BPF_CLASS(insn->code) == BPF_LDX && 8194 (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) { 8195 verbose(env, "BPF_LDX uses reserved fields\n"); 8196 return -EINVAL; 8197 } 8198 8199 if (BPF_CLASS(insn->code) == BPF_STX && 8200 ((BPF_MODE(insn->code) != BPF_MEM && 8201 BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) { 8202 verbose(env, "BPF_STX uses reserved fields\n"); 8203 return -EINVAL; 8204 } 8205 8206 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { 8207 struct bpf_insn_aux_data *aux; 8208 struct bpf_map *map; 8209 struct fd f; 8210 u64 addr; 8211 8212 if (i == insn_cnt - 1 || insn[1].code != 0 || 8213 insn[1].dst_reg != 0 || insn[1].src_reg != 0 || 8214 insn[1].off != 0) { 8215 verbose(env, "invalid bpf_ld_imm64 insn\n"); 8216 return -EINVAL; 8217 } 8218 8219 if (insn[0].src_reg == 0) 8220 /* valid generic load 64-bit imm */ 8221 goto next_insn; 8222 8223 /* In final convert_pseudo_ld_imm64() step, this is 8224 * converted into regular 64-bit imm load insn. 8225 */ 8226 if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD && 8227 insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) || 8228 (insn[0].src_reg == BPF_PSEUDO_MAP_FD && 8229 insn[1].imm != 0)) { 8230 verbose(env, 8231 "unrecognized bpf_ld_imm64 insn\n"); 8232 return -EINVAL; 8233 } 8234 8235 f = fdget(insn[0].imm); 8236 map = __bpf_map_get(f); 8237 if (IS_ERR(map)) { 8238 verbose(env, "fd %d is not pointing to valid bpf_map\n", 8239 insn[0].imm); 8240 return PTR_ERR(map); 8241 } 8242 8243 err = check_map_prog_compatibility(env, map, env->prog); 8244 if (err) { 8245 fdput(f); 8246 return err; 8247 } 8248 8249 aux = &env->insn_aux_data[i]; 8250 if (insn->src_reg == BPF_PSEUDO_MAP_FD) { 8251 addr = (unsigned long)map; 8252 } else { 8253 u32 off = insn[1].imm; 8254 8255 if (off >= BPF_MAX_VAR_OFF) { 8256 verbose(env, "direct value offset of %u is not allowed\n", off); 8257 fdput(f); 8258 return -EINVAL; 8259 } 8260 8261 if (!map->ops->map_direct_value_addr) { 8262 verbose(env, "no direct value access support for this map type\n"); 8263 fdput(f); 8264 return -EINVAL; 8265 } 8266 8267 err = map->ops->map_direct_value_addr(map, &addr, off); 8268 if (err) { 8269 verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", 8270 map->value_size, off); 8271 fdput(f); 8272 return err; 8273 } 8274 8275 aux->map_off = off; 8276 addr += off; 8277 } 8278 8279 insn[0].imm = (u32)addr; 8280 insn[1].imm = addr >> 32; 8281 8282 /* check whether we recorded this map already */ 8283 for (j = 0; j < env->used_map_cnt; j++) { 8284 if (env->used_maps[j] == map) { 8285 aux->map_index = j; 8286 fdput(f); 8287 goto next_insn; 8288 } 8289 } 8290 8291 if (env->used_map_cnt >= MAX_USED_MAPS) { 8292 fdput(f); 8293 return -E2BIG; 8294 } 8295 8296 /* hold the map. If the program is rejected by verifier, 8297 * the map will be released by release_maps() or it 8298 * will be used by the valid program until it's unloaded 8299 * and all maps are released in free_used_maps() 8300 */ 8301 bpf_map_inc(map); 8302 8303 aux->map_index = env->used_map_cnt; 8304 env->used_maps[env->used_map_cnt++] = map; 8305 8306 if (bpf_map_is_cgroup_storage(map) && 8307 bpf_cgroup_storage_assign(env->prog->aux, map)) { 8308 verbose(env, "only one cgroup storage of each type is allowed\n"); 8309 fdput(f); 8310 return -EBUSY; 8311 } 8312 8313 fdput(f); 8314 next_insn: 8315 insn++; 8316 i++; 8317 continue; 8318 } 8319 8320 /* Basic sanity check before we invest more work here. */ 8321 if (!bpf_opcode_in_insntable(insn->code)) { 8322 verbose(env, "unknown opcode %02x\n", insn->code); 8323 return -EINVAL; 8324 } 8325 } 8326 8327 /* now all pseudo BPF_LD_IMM64 instructions load valid 8328 * 'struct bpf_map *' into a register instead of user map_fd. 8329 * These pointers will be used later by verifier to validate map access. 8330 */ 8331 return 0; 8332 } 8333 8334 /* drop refcnt of maps used by the rejected program */ 8335 static void release_maps(struct bpf_verifier_env *env) 8336 { 8337 __bpf_free_used_maps(env->prog->aux, env->used_maps, 8338 env->used_map_cnt); 8339 } 8340 8341 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ 8342 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) 8343 { 8344 struct bpf_insn *insn = env->prog->insnsi; 8345 int insn_cnt = env->prog->len; 8346 int i; 8347 8348 for (i = 0; i < insn_cnt; i++, insn++) 8349 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW)) 8350 insn->src_reg = 0; 8351 } 8352 8353 /* single env->prog->insni[off] instruction was replaced with the range 8354 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying 8355 * [0, off) and [off, end) to new locations, so the patched range stays zero 8356 */ 8357 static int adjust_insn_aux_data(struct bpf_verifier_env *env, 8358 struct bpf_prog *new_prog, u32 off, u32 cnt) 8359 { 8360 struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; 8361 struct bpf_insn *insn = new_prog->insnsi; 8362 u32 prog_len; 8363 int i; 8364 8365 /* aux info at OFF always needs adjustment, no matter fast path 8366 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the 8367 * original insn at old prog. 8368 */ 8369 old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1); 8370 8371 if (cnt == 1) 8372 return 0; 8373 prog_len = new_prog->len; 8374 new_data = vzalloc(array_size(prog_len, 8375 sizeof(struct bpf_insn_aux_data))); 8376 if (!new_data) 8377 return -ENOMEM; 8378 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); 8379 memcpy(new_data + off + cnt - 1, old_data + off, 8380 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); 8381 for (i = off; i < off + cnt - 1; i++) { 8382 new_data[i].seen = env->pass_cnt; 8383 new_data[i].zext_dst = insn_has_def32(env, insn + i); 8384 } 8385 env->insn_aux_data = new_data; 8386 vfree(old_data); 8387 return 0; 8388 } 8389 8390 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) 8391 { 8392 int i; 8393 8394 if (len == 1) 8395 return; 8396 /* NOTE: fake 'exit' subprog should be updated as well. */ 8397 for (i = 0; i <= env->subprog_cnt; i++) { 8398 if (env->subprog_info[i].start <= off) 8399 continue; 8400 env->subprog_info[i].start += len - 1; 8401 } 8402 } 8403 8404 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, 8405 const struct bpf_insn *patch, u32 len) 8406 { 8407 struct bpf_prog *new_prog; 8408 8409 new_prog = bpf_patch_insn_single(env->prog, off, patch, len); 8410 if (IS_ERR(new_prog)) { 8411 if (PTR_ERR(new_prog) == -ERANGE) 8412 verbose(env, 8413 "insn %d cannot be patched due to 16-bit range\n", 8414 env->insn_aux_data[off].orig_idx); 8415 return NULL; 8416 } 8417 if (adjust_insn_aux_data(env, new_prog, off, len)) 8418 return NULL; 8419 adjust_subprog_starts(env, off, len); 8420 return new_prog; 8421 } 8422 8423 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, 8424 u32 off, u32 cnt) 8425 { 8426 int i, j; 8427 8428 /* find first prog starting at or after off (first to remove) */ 8429 for (i = 0; i < env->subprog_cnt; i++) 8430 if (env->subprog_info[i].start >= off) 8431 break; 8432 /* find first prog starting at or after off + cnt (first to stay) */ 8433 for (j = i; j < env->subprog_cnt; j++) 8434 if (env->subprog_info[j].start >= off + cnt) 8435 break; 8436 /* if j doesn't start exactly at off + cnt, we are just removing 8437 * the front of previous prog 8438 */ 8439 if (env->subprog_info[j].start != off + cnt) 8440 j--; 8441 8442 if (j > i) { 8443 struct bpf_prog_aux *aux = env->prog->aux; 8444 int move; 8445 8446 /* move fake 'exit' subprog as well */ 8447 move = env->subprog_cnt + 1 - j; 8448 8449 memmove(env->subprog_info + i, 8450 env->subprog_info + j, 8451 sizeof(*env->subprog_info) * move); 8452 env->subprog_cnt -= j - i; 8453 8454 /* remove func_info */ 8455 if (aux->func_info) { 8456 move = aux->func_info_cnt - j; 8457 8458 memmove(aux->func_info + i, 8459 aux->func_info + j, 8460 sizeof(*aux->func_info) * move); 8461 aux->func_info_cnt -= j - i; 8462 /* func_info->insn_off is set after all code rewrites, 8463 * in adjust_btf_func() - no need to adjust 8464 */ 8465 } 8466 } else { 8467 /* convert i from "first prog to remove" to "first to adjust" */ 8468 if (env->subprog_info[i].start == off) 8469 i++; 8470 } 8471 8472 /* update fake 'exit' subprog as well */ 8473 for (; i <= env->subprog_cnt; i++) 8474 env->subprog_info[i].start -= cnt; 8475 8476 return 0; 8477 } 8478 8479 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, 8480 u32 cnt) 8481 { 8482 struct bpf_prog *prog = env->prog; 8483 u32 i, l_off, l_cnt, nr_linfo; 8484 struct bpf_line_info *linfo; 8485 8486 nr_linfo = prog->aux->nr_linfo; 8487 if (!nr_linfo) 8488 return 0; 8489 8490 linfo = prog->aux->linfo; 8491 8492 /* find first line info to remove, count lines to be removed */ 8493 for (i = 0; i < nr_linfo; i++) 8494 if (linfo[i].insn_off >= off) 8495 break; 8496 8497 l_off = i; 8498 l_cnt = 0; 8499 for (; i < nr_linfo; i++) 8500 if (linfo[i].insn_off < off + cnt) 8501 l_cnt++; 8502 else 8503 break; 8504 8505 /* First live insn doesn't match first live linfo, it needs to "inherit" 8506 * last removed linfo. prog is already modified, so prog->len == off 8507 * means no live instructions after (tail of the program was removed). 8508 */ 8509 if (prog->len != off && l_cnt && 8510 (i == nr_linfo || linfo[i].insn_off != off + cnt)) { 8511 l_cnt--; 8512 linfo[--i].insn_off = off + cnt; 8513 } 8514 8515 /* remove the line info which refer to the removed instructions */ 8516 if (l_cnt) { 8517 memmove(linfo + l_off, linfo + i, 8518 sizeof(*linfo) * (nr_linfo - i)); 8519 8520 prog->aux->nr_linfo -= l_cnt; 8521 nr_linfo = prog->aux->nr_linfo; 8522 } 8523 8524 /* pull all linfo[i].insn_off >= off + cnt in by cnt */ 8525 for (i = l_off; i < nr_linfo; i++) 8526 linfo[i].insn_off -= cnt; 8527 8528 /* fix up all subprogs (incl. 'exit') which start >= off */ 8529 for (i = 0; i <= env->subprog_cnt; i++) 8530 if (env->subprog_info[i].linfo_idx > l_off) { 8531 /* program may have started in the removed region but 8532 * may not be fully removed 8533 */ 8534 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) 8535 env->subprog_info[i].linfo_idx -= l_cnt; 8536 else 8537 env->subprog_info[i].linfo_idx = l_off; 8538 } 8539 8540 return 0; 8541 } 8542 8543 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) 8544 { 8545 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 8546 unsigned int orig_prog_len = env->prog->len; 8547 int err; 8548 8549 if (bpf_prog_is_dev_bound(env->prog->aux)) 8550 bpf_prog_offload_remove_insns(env, off, cnt); 8551 8552 err = bpf_remove_insns(env->prog, off, cnt); 8553 if (err) 8554 return err; 8555 8556 err = adjust_subprog_starts_after_remove(env, off, cnt); 8557 if (err) 8558 return err; 8559 8560 err = bpf_adj_linfo_after_remove(env, off, cnt); 8561 if (err) 8562 return err; 8563 8564 memmove(aux_data + off, aux_data + off + cnt, 8565 sizeof(*aux_data) * (orig_prog_len - off - cnt)); 8566 8567 return 0; 8568 } 8569 8570 /* The verifier does more data flow analysis than llvm and will not 8571 * explore branches that are dead at run time. Malicious programs can 8572 * have dead code too. Therefore replace all dead at-run-time code 8573 * with 'ja -1'. 8574 * 8575 * Just nops are not optimal, e.g. if they would sit at the end of the 8576 * program and through another bug we would manage to jump there, then 8577 * we'd execute beyond program memory otherwise. Returning exception 8578 * code also wouldn't work since we can have subprogs where the dead 8579 * code could be located. 8580 */ 8581 static void sanitize_dead_code(struct bpf_verifier_env *env) 8582 { 8583 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 8584 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1); 8585 struct bpf_insn *insn = env->prog->insnsi; 8586 const int insn_cnt = env->prog->len; 8587 int i; 8588 8589 for (i = 0; i < insn_cnt; i++) { 8590 if (aux_data[i].seen) 8591 continue; 8592 memcpy(insn + i, &trap, sizeof(trap)); 8593 } 8594 } 8595 8596 static bool insn_is_cond_jump(u8 code) 8597 { 8598 u8 op; 8599 8600 if (BPF_CLASS(code) == BPF_JMP32) 8601 return true; 8602 8603 if (BPF_CLASS(code) != BPF_JMP) 8604 return false; 8605 8606 op = BPF_OP(code); 8607 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL; 8608 } 8609 8610 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) 8611 { 8612 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 8613 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); 8614 struct bpf_insn *insn = env->prog->insnsi; 8615 const int insn_cnt = env->prog->len; 8616 int i; 8617 8618 for (i = 0; i < insn_cnt; i++, insn++) { 8619 if (!insn_is_cond_jump(insn->code)) 8620 continue; 8621 8622 if (!aux_data[i + 1].seen) 8623 ja.off = insn->off; 8624 else if (!aux_data[i + 1 + insn->off].seen) 8625 ja.off = 0; 8626 else 8627 continue; 8628 8629 if (bpf_prog_is_dev_bound(env->prog->aux)) 8630 bpf_prog_offload_replace_insn(env, i, &ja); 8631 8632 memcpy(insn, &ja, sizeof(ja)); 8633 } 8634 } 8635 8636 static int opt_remove_dead_code(struct bpf_verifier_env *env) 8637 { 8638 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 8639 int insn_cnt = env->prog->len; 8640 int i, err; 8641 8642 for (i = 0; i < insn_cnt; i++) { 8643 int j; 8644 8645 j = 0; 8646 while (i + j < insn_cnt && !aux_data[i + j].seen) 8647 j++; 8648 if (!j) 8649 continue; 8650 8651 err = verifier_remove_insns(env, i, j); 8652 if (err) 8653 return err; 8654 insn_cnt = env->prog->len; 8655 } 8656 8657 return 0; 8658 } 8659 8660 static int opt_remove_nops(struct bpf_verifier_env *env) 8661 { 8662 const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); 8663 struct bpf_insn *insn = env->prog->insnsi; 8664 int insn_cnt = env->prog->len; 8665 int i, err; 8666 8667 for (i = 0; i < insn_cnt; i++) { 8668 if (memcmp(&insn[i], &ja, sizeof(ja))) 8669 continue; 8670 8671 err = verifier_remove_insns(env, i, 1); 8672 if (err) 8673 return err; 8674 insn_cnt--; 8675 i--; 8676 } 8677 8678 return 0; 8679 } 8680 8681 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, 8682 const union bpf_attr *attr) 8683 { 8684 struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4]; 8685 struct bpf_insn_aux_data *aux = env->insn_aux_data; 8686 int i, patch_len, delta = 0, len = env->prog->len; 8687 struct bpf_insn *insns = env->prog->insnsi; 8688 struct bpf_prog *new_prog; 8689 bool rnd_hi32; 8690 8691 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32; 8692 zext_patch[1] = BPF_ZEXT_REG(0); 8693 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0); 8694 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); 8695 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX); 8696 for (i = 0; i < len; i++) { 8697 int adj_idx = i + delta; 8698 struct bpf_insn insn; 8699 8700 insn = insns[adj_idx]; 8701 if (!aux[adj_idx].zext_dst) { 8702 u8 code, class; 8703 u32 imm_rnd; 8704 8705 if (!rnd_hi32) 8706 continue; 8707 8708 code = insn.code; 8709 class = BPF_CLASS(code); 8710 if (insn_no_def(&insn)) 8711 continue; 8712 8713 /* NOTE: arg "reg" (the fourth one) is only used for 8714 * BPF_STX which has been ruled out in above 8715 * check, it is safe to pass NULL here. 8716 */ 8717 if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) { 8718 if (class == BPF_LD && 8719 BPF_MODE(code) == BPF_IMM) 8720 i++; 8721 continue; 8722 } 8723 8724 /* ctx load could be transformed into wider load. */ 8725 if (class == BPF_LDX && 8726 aux[adj_idx].ptr_type == PTR_TO_CTX) 8727 continue; 8728 8729 imm_rnd = get_random_int(); 8730 rnd_hi32_patch[0] = insn; 8731 rnd_hi32_patch[1].imm = imm_rnd; 8732 rnd_hi32_patch[3].dst_reg = insn.dst_reg; 8733 patch = rnd_hi32_patch; 8734 patch_len = 4; 8735 goto apply_patch_buffer; 8736 } 8737 8738 if (!bpf_jit_needs_zext()) 8739 continue; 8740 8741 zext_patch[0] = insn; 8742 zext_patch[1].dst_reg = insn.dst_reg; 8743 zext_patch[1].src_reg = insn.dst_reg; 8744 patch = zext_patch; 8745 patch_len = 2; 8746 apply_patch_buffer: 8747 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len); 8748 if (!new_prog) 8749 return -ENOMEM; 8750 env->prog = new_prog; 8751 insns = new_prog->insnsi; 8752 aux = env->insn_aux_data; 8753 delta += patch_len - 1; 8754 } 8755 8756 return 0; 8757 } 8758 8759 /* convert load instructions that access fields of a context type into a 8760 * sequence of instructions that access fields of the underlying structure: 8761 * struct __sk_buff -> struct sk_buff 8762 * struct bpf_sock_ops -> struct sock 8763 */ 8764 static int convert_ctx_accesses(struct bpf_verifier_env *env) 8765 { 8766 const struct bpf_verifier_ops *ops = env->ops; 8767 int i, cnt, size, ctx_field_size, delta = 0; 8768 const int insn_cnt = env->prog->len; 8769 struct bpf_insn insn_buf[16], *insn; 8770 u32 target_size, size_default, off; 8771 struct bpf_prog *new_prog; 8772 enum bpf_access_type type; 8773 bool is_narrower_load; 8774 8775 if (ops->gen_prologue || env->seen_direct_write) { 8776 if (!ops->gen_prologue) { 8777 verbose(env, "bpf verifier is misconfigured\n"); 8778 return -EINVAL; 8779 } 8780 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, 8781 env->prog); 8782 if (cnt >= ARRAY_SIZE(insn_buf)) { 8783 verbose(env, "bpf verifier is misconfigured\n"); 8784 return -EINVAL; 8785 } else if (cnt) { 8786 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); 8787 if (!new_prog) 8788 return -ENOMEM; 8789 8790 env->prog = new_prog; 8791 delta += cnt - 1; 8792 } 8793 } 8794 8795 if (bpf_prog_is_dev_bound(env->prog->aux)) 8796 return 0; 8797 8798 insn = env->prog->insnsi + delta; 8799 8800 for (i = 0; i < insn_cnt; i++, insn++) { 8801 bpf_convert_ctx_access_t convert_ctx_access; 8802 8803 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || 8804 insn->code == (BPF_LDX | BPF_MEM | BPF_H) || 8805 insn->code == (BPF_LDX | BPF_MEM | BPF_W) || 8806 insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) 8807 type = BPF_READ; 8808 else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || 8809 insn->code == (BPF_STX | BPF_MEM | BPF_H) || 8810 insn->code == (BPF_STX | BPF_MEM | BPF_W) || 8811 insn->code == (BPF_STX | BPF_MEM | BPF_DW)) 8812 type = BPF_WRITE; 8813 else 8814 continue; 8815 8816 if (type == BPF_WRITE && 8817 env->insn_aux_data[i + delta].sanitize_stack_off) { 8818 struct bpf_insn patch[] = { 8819 /* Sanitize suspicious stack slot with zero. 8820 * There are no memory dependencies for this store, 8821 * since it's only using frame pointer and immediate 8822 * constant of zero 8823 */ 8824 BPF_ST_MEM(BPF_DW, BPF_REG_FP, 8825 env->insn_aux_data[i + delta].sanitize_stack_off, 8826 0), 8827 /* the original STX instruction will immediately 8828 * overwrite the same stack slot with appropriate value 8829 */ 8830 *insn, 8831 }; 8832 8833 cnt = ARRAY_SIZE(patch); 8834 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt); 8835 if (!new_prog) 8836 return -ENOMEM; 8837 8838 delta += cnt - 1; 8839 env->prog = new_prog; 8840 insn = new_prog->insnsi + i + delta; 8841 continue; 8842 } 8843 8844 switch (env->insn_aux_data[i + delta].ptr_type) { 8845 case PTR_TO_CTX: 8846 if (!ops->convert_ctx_access) 8847 continue; 8848 convert_ctx_access = ops->convert_ctx_access; 8849 break; 8850 case PTR_TO_SOCKET: 8851 case PTR_TO_SOCK_COMMON: 8852 convert_ctx_access = bpf_sock_convert_ctx_access; 8853 break; 8854 case PTR_TO_TCP_SOCK: 8855 convert_ctx_access = bpf_tcp_sock_convert_ctx_access; 8856 break; 8857 case PTR_TO_XDP_SOCK: 8858 convert_ctx_access = bpf_xdp_sock_convert_ctx_access; 8859 break; 8860 case PTR_TO_BTF_ID: 8861 if (type == BPF_READ) { 8862 insn->code = BPF_LDX | BPF_PROBE_MEM | 8863 BPF_SIZE((insn)->code); 8864 env->prog->aux->num_exentries++; 8865 } else if (env->prog->type != BPF_PROG_TYPE_STRUCT_OPS) { 8866 verbose(env, "Writes through BTF pointers are not allowed\n"); 8867 return -EINVAL; 8868 } 8869 continue; 8870 default: 8871 continue; 8872 } 8873 8874 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; 8875 size = BPF_LDST_BYTES(insn); 8876 8877 /* If the read access is a narrower load of the field, 8878 * convert to a 4/8-byte load, to minimum program type specific 8879 * convert_ctx_access changes. If conversion is successful, 8880 * we will apply proper mask to the result. 8881 */ 8882 is_narrower_load = size < ctx_field_size; 8883 size_default = bpf_ctx_off_adjust_machine(ctx_field_size); 8884 off = insn->off; 8885 if (is_narrower_load) { 8886 u8 size_code; 8887 8888 if (type == BPF_WRITE) { 8889 verbose(env, "bpf verifier narrow ctx access misconfigured\n"); 8890 return -EINVAL; 8891 } 8892 8893 size_code = BPF_H; 8894 if (ctx_field_size == 4) 8895 size_code = BPF_W; 8896 else if (ctx_field_size == 8) 8897 size_code = BPF_DW; 8898 8899 insn->off = off & ~(size_default - 1); 8900 insn->code = BPF_LDX | BPF_MEM | size_code; 8901 } 8902 8903 target_size = 0; 8904 cnt = convert_ctx_access(type, insn, insn_buf, env->prog, 8905 &target_size); 8906 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || 8907 (ctx_field_size && !target_size)) { 8908 verbose(env, "bpf verifier is misconfigured\n"); 8909 return -EINVAL; 8910 } 8911 8912 if (is_narrower_load && size < target_size) { 8913 u8 shift = bpf_ctx_narrow_access_offset( 8914 off, size, size_default) * 8; 8915 if (ctx_field_size <= 4) { 8916 if (shift) 8917 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, 8918 insn->dst_reg, 8919 shift); 8920 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, 8921 (1 << size * 8) - 1); 8922 } else { 8923 if (shift) 8924 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, 8925 insn->dst_reg, 8926 shift); 8927 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, 8928 (1ULL << size * 8) - 1); 8929 } 8930 } 8931 8932 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 8933 if (!new_prog) 8934 return -ENOMEM; 8935 8936 delta += cnt - 1; 8937 8938 /* keep walking new program and skip insns we just inserted */ 8939 env->prog = new_prog; 8940 insn = new_prog->insnsi + i + delta; 8941 } 8942 8943 return 0; 8944 } 8945 8946 static int jit_subprogs(struct bpf_verifier_env *env) 8947 { 8948 struct bpf_prog *prog = env->prog, **func, *tmp; 8949 int i, j, subprog_start, subprog_end = 0, len, subprog; 8950 struct bpf_insn *insn; 8951 void *old_bpf_func; 8952 int err; 8953 8954 if (env->subprog_cnt <= 1) 8955 return 0; 8956 8957 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 8958 if (insn->code != (BPF_JMP | BPF_CALL) || 8959 insn->src_reg != BPF_PSEUDO_CALL) 8960 continue; 8961 /* Upon error here we cannot fall back to interpreter but 8962 * need a hard reject of the program. Thus -EFAULT is 8963 * propagated in any case. 8964 */ 8965 subprog = find_subprog(env, i + insn->imm + 1); 8966 if (subprog < 0) { 8967 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", 8968 i + insn->imm + 1); 8969 return -EFAULT; 8970 } 8971 /* temporarily remember subprog id inside insn instead of 8972 * aux_data, since next loop will split up all insns into funcs 8973 */ 8974 insn->off = subprog; 8975 /* remember original imm in case JIT fails and fallback 8976 * to interpreter will be needed 8977 */ 8978 env->insn_aux_data[i].call_imm = insn->imm; 8979 /* point imm to __bpf_call_base+1 from JITs point of view */ 8980 insn->imm = 1; 8981 } 8982 8983 err = bpf_prog_alloc_jited_linfo(prog); 8984 if (err) 8985 goto out_undo_insn; 8986 8987 err = -ENOMEM; 8988 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL); 8989 if (!func) 8990 goto out_undo_insn; 8991 8992 for (i = 0; i < env->subprog_cnt; i++) { 8993 subprog_start = subprog_end; 8994 subprog_end = env->subprog_info[i + 1].start; 8995 8996 len = subprog_end - subprog_start; 8997 /* BPF_PROG_RUN doesn't call subprogs directly, 8998 * hence main prog stats include the runtime of subprogs. 8999 * subprogs don't have IDs and not reachable via prog_get_next_id 9000 * func[i]->aux->stats will never be accessed and stays NULL 9001 */ 9002 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER); 9003 if (!func[i]) 9004 goto out_free; 9005 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], 9006 len * sizeof(struct bpf_insn)); 9007 func[i]->type = prog->type; 9008 func[i]->len = len; 9009 if (bpf_prog_calc_tag(func[i])) 9010 goto out_free; 9011 func[i]->is_func = 1; 9012 func[i]->aux->func_idx = i; 9013 /* the btf and func_info will be freed only at prog->aux */ 9014 func[i]->aux->btf = prog->aux->btf; 9015 func[i]->aux->func_info = prog->aux->func_info; 9016 9017 /* Use bpf_prog_F_tag to indicate functions in stack traces. 9018 * Long term would need debug info to populate names 9019 */ 9020 func[i]->aux->name[0] = 'F'; 9021 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; 9022 func[i]->jit_requested = 1; 9023 func[i]->aux->linfo = prog->aux->linfo; 9024 func[i]->aux->nr_linfo = prog->aux->nr_linfo; 9025 func[i]->aux->jited_linfo = prog->aux->jited_linfo; 9026 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx; 9027 func[i] = bpf_int_jit_compile(func[i]); 9028 if (!func[i]->jited) { 9029 err = -ENOTSUPP; 9030 goto out_free; 9031 } 9032 cond_resched(); 9033 } 9034 /* at this point all bpf functions were successfully JITed 9035 * now populate all bpf_calls with correct addresses and 9036 * run last pass of JIT 9037 */ 9038 for (i = 0; i < env->subprog_cnt; i++) { 9039 insn = func[i]->insnsi; 9040 for (j = 0; j < func[i]->len; j++, insn++) { 9041 if (insn->code != (BPF_JMP | BPF_CALL) || 9042 insn->src_reg != BPF_PSEUDO_CALL) 9043 continue; 9044 subprog = insn->off; 9045 insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) - 9046 __bpf_call_base; 9047 } 9048 9049 /* we use the aux data to keep a list of the start addresses 9050 * of the JITed images for each function in the program 9051 * 9052 * for some architectures, such as powerpc64, the imm field 9053 * might not be large enough to hold the offset of the start 9054 * address of the callee's JITed image from __bpf_call_base 9055 * 9056 * in such cases, we can lookup the start address of a callee 9057 * by using its subprog id, available from the off field of 9058 * the call instruction, as an index for this list 9059 */ 9060 func[i]->aux->func = func; 9061 func[i]->aux->func_cnt = env->subprog_cnt; 9062 } 9063 for (i = 0; i < env->subprog_cnt; i++) { 9064 old_bpf_func = func[i]->bpf_func; 9065 tmp = bpf_int_jit_compile(func[i]); 9066 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { 9067 verbose(env, "JIT doesn't support bpf-to-bpf calls\n"); 9068 err = -ENOTSUPP; 9069 goto out_free; 9070 } 9071 cond_resched(); 9072 } 9073 9074 /* finally lock prog and jit images for all functions and 9075 * populate kallsysm 9076 */ 9077 for (i = 0; i < env->subprog_cnt; i++) { 9078 bpf_prog_lock_ro(func[i]); 9079 bpf_prog_kallsyms_add(func[i]); 9080 } 9081 9082 /* Last step: make now unused interpreter insns from main 9083 * prog consistent for later dump requests, so they can 9084 * later look the same as if they were interpreted only. 9085 */ 9086 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 9087 if (insn->code != (BPF_JMP | BPF_CALL) || 9088 insn->src_reg != BPF_PSEUDO_CALL) 9089 continue; 9090 insn->off = env->insn_aux_data[i].call_imm; 9091 subprog = find_subprog(env, i + insn->off + 1); 9092 insn->imm = subprog; 9093 } 9094 9095 prog->jited = 1; 9096 prog->bpf_func = func[0]->bpf_func; 9097 prog->aux->func = func; 9098 prog->aux->func_cnt = env->subprog_cnt; 9099 bpf_prog_free_unused_jited_linfo(prog); 9100 return 0; 9101 out_free: 9102 for (i = 0; i < env->subprog_cnt; i++) 9103 if (func[i]) 9104 bpf_jit_free(func[i]); 9105 kfree(func); 9106 out_undo_insn: 9107 /* cleanup main prog to be interpreted */ 9108 prog->jit_requested = 0; 9109 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { 9110 if (insn->code != (BPF_JMP | BPF_CALL) || 9111 insn->src_reg != BPF_PSEUDO_CALL) 9112 continue; 9113 insn->off = 0; 9114 insn->imm = env->insn_aux_data[i].call_imm; 9115 } 9116 bpf_prog_free_jited_linfo(prog); 9117 return err; 9118 } 9119 9120 static int fixup_call_args(struct bpf_verifier_env *env) 9121 { 9122 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 9123 struct bpf_prog *prog = env->prog; 9124 struct bpf_insn *insn = prog->insnsi; 9125 int i, depth; 9126 #endif 9127 int err = 0; 9128 9129 if (env->prog->jit_requested && 9130 !bpf_prog_is_dev_bound(env->prog->aux)) { 9131 err = jit_subprogs(env); 9132 if (err == 0) 9133 return 0; 9134 if (err == -EFAULT) 9135 return err; 9136 } 9137 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 9138 for (i = 0; i < prog->len; i++, insn++) { 9139 if (insn->code != (BPF_JMP | BPF_CALL) || 9140 insn->src_reg != BPF_PSEUDO_CALL) 9141 continue; 9142 depth = get_callee_stack_depth(env, insn, i); 9143 if (depth < 0) 9144 return depth; 9145 bpf_patch_call_args(insn, depth); 9146 } 9147 err = 0; 9148 #endif 9149 return err; 9150 } 9151 9152 /* fixup insn->imm field of bpf_call instructions 9153 * and inline eligible helpers as explicit sequence of BPF instructions 9154 * 9155 * this function is called after eBPF program passed verification 9156 */ 9157 static int fixup_bpf_calls(struct bpf_verifier_env *env) 9158 { 9159 struct bpf_prog *prog = env->prog; 9160 bool expect_blinding = bpf_jit_blinding_enabled(prog); 9161 struct bpf_insn *insn = prog->insnsi; 9162 const struct bpf_func_proto *fn; 9163 const int insn_cnt = prog->len; 9164 const struct bpf_map_ops *ops; 9165 struct bpf_insn_aux_data *aux; 9166 struct bpf_insn insn_buf[16]; 9167 struct bpf_prog *new_prog; 9168 struct bpf_map *map_ptr; 9169 int i, ret, cnt, delta = 0; 9170 9171 for (i = 0; i < insn_cnt; i++, insn++) { 9172 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || 9173 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || 9174 insn->code == (BPF_ALU | BPF_MOD | BPF_X) || 9175 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { 9176 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; 9177 struct bpf_insn mask_and_div[] = { 9178 BPF_MOV32_REG(insn->src_reg, insn->src_reg), 9179 /* Rx div 0 -> 0 */ 9180 BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2), 9181 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg), 9182 BPF_JMP_IMM(BPF_JA, 0, 0, 1), 9183 *insn, 9184 }; 9185 struct bpf_insn mask_and_mod[] = { 9186 BPF_MOV32_REG(insn->src_reg, insn->src_reg), 9187 /* Rx mod 0 -> Rx */ 9188 BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1), 9189 *insn, 9190 }; 9191 struct bpf_insn *patchlet; 9192 9193 if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || 9194 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { 9195 patchlet = mask_and_div + (is64 ? 1 : 0); 9196 cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0); 9197 } else { 9198 patchlet = mask_and_mod + (is64 ? 1 : 0); 9199 cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0); 9200 } 9201 9202 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); 9203 if (!new_prog) 9204 return -ENOMEM; 9205 9206 delta += cnt - 1; 9207 env->prog = prog = new_prog; 9208 insn = new_prog->insnsi + i + delta; 9209 continue; 9210 } 9211 9212 if (BPF_CLASS(insn->code) == BPF_LD && 9213 (BPF_MODE(insn->code) == BPF_ABS || 9214 BPF_MODE(insn->code) == BPF_IND)) { 9215 cnt = env->ops->gen_ld_abs(insn, insn_buf); 9216 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { 9217 verbose(env, "bpf verifier is misconfigured\n"); 9218 return -EINVAL; 9219 } 9220 9221 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 9222 if (!new_prog) 9223 return -ENOMEM; 9224 9225 delta += cnt - 1; 9226 env->prog = prog = new_prog; 9227 insn = new_prog->insnsi + i + delta; 9228 continue; 9229 } 9230 9231 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || 9232 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { 9233 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; 9234 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; 9235 struct bpf_insn insn_buf[16]; 9236 struct bpf_insn *patch = &insn_buf[0]; 9237 bool issrc, isneg; 9238 u32 off_reg; 9239 9240 aux = &env->insn_aux_data[i + delta]; 9241 if (!aux->alu_state || 9242 aux->alu_state == BPF_ALU_NON_POINTER) 9243 continue; 9244 9245 isneg = aux->alu_state & BPF_ALU_NEG_VALUE; 9246 issrc = (aux->alu_state & BPF_ALU_SANITIZE) == 9247 BPF_ALU_SANITIZE_SRC; 9248 9249 off_reg = issrc ? insn->src_reg : insn->dst_reg; 9250 if (isneg) 9251 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); 9252 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1); 9253 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); 9254 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); 9255 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); 9256 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63); 9257 if (issrc) { 9258 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, 9259 off_reg); 9260 insn->src_reg = BPF_REG_AX; 9261 } else { 9262 *patch++ = BPF_ALU64_REG(BPF_AND, off_reg, 9263 BPF_REG_AX); 9264 } 9265 if (isneg) 9266 insn->code = insn->code == code_add ? 9267 code_sub : code_add; 9268 *patch++ = *insn; 9269 if (issrc && isneg) 9270 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); 9271 cnt = patch - insn_buf; 9272 9273 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 9274 if (!new_prog) 9275 return -ENOMEM; 9276 9277 delta += cnt - 1; 9278 env->prog = prog = new_prog; 9279 insn = new_prog->insnsi + i + delta; 9280 continue; 9281 } 9282 9283 if (insn->code != (BPF_JMP | BPF_CALL)) 9284 continue; 9285 if (insn->src_reg == BPF_PSEUDO_CALL) 9286 continue; 9287 9288 if (insn->imm == BPF_FUNC_get_route_realm) 9289 prog->dst_needed = 1; 9290 if (insn->imm == BPF_FUNC_get_prandom_u32) 9291 bpf_user_rnd_init_once(); 9292 if (insn->imm == BPF_FUNC_override_return) 9293 prog->kprobe_override = 1; 9294 if (insn->imm == BPF_FUNC_tail_call) { 9295 /* If we tail call into other programs, we 9296 * cannot make any assumptions since they can 9297 * be replaced dynamically during runtime in 9298 * the program array. 9299 */ 9300 prog->cb_access = 1; 9301 env->prog->aux->stack_depth = MAX_BPF_STACK; 9302 env->prog->aux->max_pkt_offset = MAX_PACKET_OFF; 9303 9304 /* mark bpf_tail_call as different opcode to avoid 9305 * conditional branch in the interpeter for every normal 9306 * call and to prevent accidental JITing by JIT compiler 9307 * that doesn't support bpf_tail_call yet 9308 */ 9309 insn->imm = 0; 9310 insn->code = BPF_JMP | BPF_TAIL_CALL; 9311 9312 aux = &env->insn_aux_data[i + delta]; 9313 if (env->allow_ptr_leaks && !expect_blinding && 9314 prog->jit_requested && 9315 !bpf_map_key_poisoned(aux) && 9316 !bpf_map_ptr_poisoned(aux) && 9317 !bpf_map_ptr_unpriv(aux)) { 9318 struct bpf_jit_poke_descriptor desc = { 9319 .reason = BPF_POKE_REASON_TAIL_CALL, 9320 .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state), 9321 .tail_call.key = bpf_map_key_immediate(aux), 9322 }; 9323 9324 ret = bpf_jit_add_poke_descriptor(prog, &desc); 9325 if (ret < 0) { 9326 verbose(env, "adding tail call poke descriptor failed\n"); 9327 return ret; 9328 } 9329 9330 insn->imm = ret + 1; 9331 continue; 9332 } 9333 9334 if (!bpf_map_ptr_unpriv(aux)) 9335 continue; 9336 9337 /* instead of changing every JIT dealing with tail_call 9338 * emit two extra insns: 9339 * if (index >= max_entries) goto out; 9340 * index &= array->index_mask; 9341 * to avoid out-of-bounds cpu speculation 9342 */ 9343 if (bpf_map_ptr_poisoned(aux)) { 9344 verbose(env, "tail_call abusing map_ptr\n"); 9345 return -EINVAL; 9346 } 9347 9348 map_ptr = BPF_MAP_PTR(aux->map_ptr_state); 9349 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, 9350 map_ptr->max_entries, 2); 9351 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, 9352 container_of(map_ptr, 9353 struct bpf_array, 9354 map)->index_mask); 9355 insn_buf[2] = *insn; 9356 cnt = 3; 9357 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 9358 if (!new_prog) 9359 return -ENOMEM; 9360 9361 delta += cnt - 1; 9362 env->prog = prog = new_prog; 9363 insn = new_prog->insnsi + i + delta; 9364 continue; 9365 } 9366 9367 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup 9368 * and other inlining handlers are currently limited to 64 bit 9369 * only. 9370 */ 9371 if (prog->jit_requested && BITS_PER_LONG == 64 && 9372 (insn->imm == BPF_FUNC_map_lookup_elem || 9373 insn->imm == BPF_FUNC_map_update_elem || 9374 insn->imm == BPF_FUNC_map_delete_elem || 9375 insn->imm == BPF_FUNC_map_push_elem || 9376 insn->imm == BPF_FUNC_map_pop_elem || 9377 insn->imm == BPF_FUNC_map_peek_elem)) { 9378 aux = &env->insn_aux_data[i + delta]; 9379 if (bpf_map_ptr_poisoned(aux)) 9380 goto patch_call_imm; 9381 9382 map_ptr = BPF_MAP_PTR(aux->map_ptr_state); 9383 ops = map_ptr->ops; 9384 if (insn->imm == BPF_FUNC_map_lookup_elem && 9385 ops->map_gen_lookup) { 9386 cnt = ops->map_gen_lookup(map_ptr, insn_buf); 9387 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { 9388 verbose(env, "bpf verifier is misconfigured\n"); 9389 return -EINVAL; 9390 } 9391 9392 new_prog = bpf_patch_insn_data(env, i + delta, 9393 insn_buf, cnt); 9394 if (!new_prog) 9395 return -ENOMEM; 9396 9397 delta += cnt - 1; 9398 env->prog = prog = new_prog; 9399 insn = new_prog->insnsi + i + delta; 9400 continue; 9401 } 9402 9403 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, 9404 (void *(*)(struct bpf_map *map, void *key))NULL)); 9405 BUILD_BUG_ON(!__same_type(ops->map_delete_elem, 9406 (int (*)(struct bpf_map *map, void *key))NULL)); 9407 BUILD_BUG_ON(!__same_type(ops->map_update_elem, 9408 (int (*)(struct bpf_map *map, void *key, void *value, 9409 u64 flags))NULL)); 9410 BUILD_BUG_ON(!__same_type(ops->map_push_elem, 9411 (int (*)(struct bpf_map *map, void *value, 9412 u64 flags))NULL)); 9413 BUILD_BUG_ON(!__same_type(ops->map_pop_elem, 9414 (int (*)(struct bpf_map *map, void *value))NULL)); 9415 BUILD_BUG_ON(!__same_type(ops->map_peek_elem, 9416 (int (*)(struct bpf_map *map, void *value))NULL)); 9417 9418 switch (insn->imm) { 9419 case BPF_FUNC_map_lookup_elem: 9420 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - 9421 __bpf_call_base; 9422 continue; 9423 case BPF_FUNC_map_update_elem: 9424 insn->imm = BPF_CAST_CALL(ops->map_update_elem) - 9425 __bpf_call_base; 9426 continue; 9427 case BPF_FUNC_map_delete_elem: 9428 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - 9429 __bpf_call_base; 9430 continue; 9431 case BPF_FUNC_map_push_elem: 9432 insn->imm = BPF_CAST_CALL(ops->map_push_elem) - 9433 __bpf_call_base; 9434 continue; 9435 case BPF_FUNC_map_pop_elem: 9436 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) - 9437 __bpf_call_base; 9438 continue; 9439 case BPF_FUNC_map_peek_elem: 9440 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - 9441 __bpf_call_base; 9442 continue; 9443 } 9444 9445 goto patch_call_imm; 9446 } 9447 9448 patch_call_imm: 9449 fn = env->ops->get_func_proto(insn->imm, env->prog); 9450 /* all functions that have prototype and verifier allowed 9451 * programs to call them, must be real in-kernel functions 9452 */ 9453 if (!fn->func) { 9454 verbose(env, 9455 "kernel subsystem misconfigured func %s#%d\n", 9456 func_id_name(insn->imm), insn->imm); 9457 return -EFAULT; 9458 } 9459 insn->imm = fn->func - __bpf_call_base; 9460 } 9461 9462 /* Since poke tab is now finalized, publish aux to tracker. */ 9463 for (i = 0; i < prog->aux->size_poke_tab; i++) { 9464 map_ptr = prog->aux->poke_tab[i].tail_call.map; 9465 if (!map_ptr->ops->map_poke_track || 9466 !map_ptr->ops->map_poke_untrack || 9467 !map_ptr->ops->map_poke_run) { 9468 verbose(env, "bpf verifier is misconfigured\n"); 9469 return -EINVAL; 9470 } 9471 9472 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux); 9473 if (ret < 0) { 9474 verbose(env, "tracking tail call prog failed\n"); 9475 return ret; 9476 } 9477 } 9478 9479 return 0; 9480 } 9481 9482 static void free_states(struct bpf_verifier_env *env) 9483 { 9484 struct bpf_verifier_state_list *sl, *sln; 9485 int i; 9486 9487 sl = env->free_list; 9488 while (sl) { 9489 sln = sl->next; 9490 free_verifier_state(&sl->state, false); 9491 kfree(sl); 9492 sl = sln; 9493 } 9494 env->free_list = NULL; 9495 9496 if (!env->explored_states) 9497 return; 9498 9499 for (i = 0; i < state_htab_size(env); i++) { 9500 sl = env->explored_states[i]; 9501 9502 while (sl) { 9503 sln = sl->next; 9504 free_verifier_state(&sl->state, false); 9505 kfree(sl); 9506 sl = sln; 9507 } 9508 env->explored_states[i] = NULL; 9509 } 9510 } 9511 9512 /* The verifier is using insn_aux_data[] to store temporary data during 9513 * verification and to store information for passes that run after the 9514 * verification like dead code sanitization. do_check_common() for subprogram N 9515 * may analyze many other subprograms. sanitize_insn_aux_data() clears all 9516 * temporary data after do_check_common() finds that subprogram N cannot be 9517 * verified independently. pass_cnt counts the number of times 9518 * do_check_common() was run and insn->aux->seen tells the pass number 9519 * insn_aux_data was touched. These variables are compared to clear temporary 9520 * data from failed pass. For testing and experiments do_check_common() can be 9521 * run multiple times even when prior attempt to verify is unsuccessful. 9522 */ 9523 static void sanitize_insn_aux_data(struct bpf_verifier_env *env) 9524 { 9525 struct bpf_insn *insn = env->prog->insnsi; 9526 struct bpf_insn_aux_data *aux; 9527 int i, class; 9528 9529 for (i = 0; i < env->prog->len; i++) { 9530 class = BPF_CLASS(insn[i].code); 9531 if (class != BPF_LDX && class != BPF_STX) 9532 continue; 9533 aux = &env->insn_aux_data[i]; 9534 if (aux->seen != env->pass_cnt) 9535 continue; 9536 memset(aux, 0, offsetof(typeof(*aux), orig_idx)); 9537 } 9538 } 9539 9540 static int do_check_common(struct bpf_verifier_env *env, int subprog) 9541 { 9542 struct bpf_verifier_state *state; 9543 struct bpf_reg_state *regs; 9544 int ret, i; 9545 9546 env->prev_linfo = NULL; 9547 env->pass_cnt++; 9548 9549 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); 9550 if (!state) 9551 return -ENOMEM; 9552 state->curframe = 0; 9553 state->speculative = false; 9554 state->branches = 1; 9555 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); 9556 if (!state->frame[0]) { 9557 kfree(state); 9558 return -ENOMEM; 9559 } 9560 env->cur_state = state; 9561 init_func_state(env, state->frame[0], 9562 BPF_MAIN_FUNC /* callsite */, 9563 0 /* frameno */, 9564 subprog); 9565 9566 regs = state->frame[state->curframe]->regs; 9567 if (subprog) { 9568 ret = btf_prepare_func_args(env, subprog, regs); 9569 if (ret) 9570 goto out; 9571 for (i = BPF_REG_1; i <= BPF_REG_5; i++) { 9572 if (regs[i].type == PTR_TO_CTX) 9573 mark_reg_known_zero(env, regs, i); 9574 else if (regs[i].type == SCALAR_VALUE) 9575 mark_reg_unknown(env, regs, i); 9576 } 9577 } else { 9578 /* 1st arg to a function */ 9579 regs[BPF_REG_1].type = PTR_TO_CTX; 9580 mark_reg_known_zero(env, regs, BPF_REG_1); 9581 ret = btf_check_func_arg_match(env, subprog, regs); 9582 if (ret == -EFAULT) 9583 /* unlikely verifier bug. abort. 9584 * ret == 0 and ret < 0 are sadly acceptable for 9585 * main() function due to backward compatibility. 9586 * Like socket filter program may be written as: 9587 * int bpf_prog(struct pt_regs *ctx) 9588 * and never dereference that ctx in the program. 9589 * 'struct pt_regs' is a type mismatch for socket 9590 * filter that should be using 'struct __sk_buff'. 9591 */ 9592 goto out; 9593 } 9594 9595 ret = do_check(env); 9596 out: 9597 free_verifier_state(env->cur_state, true); 9598 env->cur_state = NULL; 9599 while (!pop_stack(env, NULL, NULL)); 9600 free_states(env); 9601 if (ret) 9602 /* clean aux data in case subprog was rejected */ 9603 sanitize_insn_aux_data(env); 9604 return ret; 9605 } 9606 9607 /* Verify all global functions in a BPF program one by one based on their BTF. 9608 * All global functions must pass verification. Otherwise the whole program is rejected. 9609 * Consider: 9610 * int bar(int); 9611 * int foo(int f) 9612 * { 9613 * return bar(f); 9614 * } 9615 * int bar(int b) 9616 * { 9617 * ... 9618 * } 9619 * foo() will be verified first for R1=any_scalar_value. During verification it 9620 * will be assumed that bar() already verified successfully and call to bar() 9621 * from foo() will be checked for type match only. Later bar() will be verified 9622 * independently to check that it's safe for R1=any_scalar_value. 9623 */ 9624 static int do_check_subprogs(struct bpf_verifier_env *env) 9625 { 9626 struct bpf_prog_aux *aux = env->prog->aux; 9627 int i, ret; 9628 9629 if (!aux->func_info) 9630 return 0; 9631 9632 for (i = 1; i < env->subprog_cnt; i++) { 9633 if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL) 9634 continue; 9635 env->insn_idx = env->subprog_info[i].start; 9636 WARN_ON_ONCE(env->insn_idx == 0); 9637 ret = do_check_common(env, i); 9638 if (ret) { 9639 return ret; 9640 } else if (env->log.level & BPF_LOG_LEVEL) { 9641 verbose(env, 9642 "Func#%d is safe for any args that match its prototype\n", 9643 i); 9644 } 9645 } 9646 return 0; 9647 } 9648 9649 static int do_check_main(struct bpf_verifier_env *env) 9650 { 9651 int ret; 9652 9653 env->insn_idx = 0; 9654 ret = do_check_common(env, 0); 9655 if (!ret) 9656 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; 9657 return ret; 9658 } 9659 9660 9661 static void print_verification_stats(struct bpf_verifier_env *env) 9662 { 9663 int i; 9664 9665 if (env->log.level & BPF_LOG_STATS) { 9666 verbose(env, "verification time %lld usec\n", 9667 div_u64(env->verification_time, 1000)); 9668 verbose(env, "stack depth "); 9669 for (i = 0; i < env->subprog_cnt; i++) { 9670 u32 depth = env->subprog_info[i].stack_depth; 9671 9672 verbose(env, "%d", depth); 9673 if (i + 1 < env->subprog_cnt) 9674 verbose(env, "+"); 9675 } 9676 verbose(env, "\n"); 9677 } 9678 verbose(env, "processed %d insns (limit %d) max_states_per_insn %d " 9679 "total_states %d peak_states %d mark_read %d\n", 9680 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS, 9681 env->max_states_per_insn, env->total_states, 9682 env->peak_states, env->longest_mark_read_walk); 9683 } 9684 9685 static int check_struct_ops_btf_id(struct bpf_verifier_env *env) 9686 { 9687 const struct btf_type *t, *func_proto; 9688 const struct bpf_struct_ops *st_ops; 9689 const struct btf_member *member; 9690 struct bpf_prog *prog = env->prog; 9691 u32 btf_id, member_idx; 9692 const char *mname; 9693 9694 btf_id = prog->aux->attach_btf_id; 9695 st_ops = bpf_struct_ops_find(btf_id); 9696 if (!st_ops) { 9697 verbose(env, "attach_btf_id %u is not a supported struct\n", 9698 btf_id); 9699 return -ENOTSUPP; 9700 } 9701 9702 t = st_ops->type; 9703 member_idx = prog->expected_attach_type; 9704 if (member_idx >= btf_type_vlen(t)) { 9705 verbose(env, "attach to invalid member idx %u of struct %s\n", 9706 member_idx, st_ops->name); 9707 return -EINVAL; 9708 } 9709 9710 member = &btf_type_member(t)[member_idx]; 9711 mname = btf_name_by_offset(btf_vmlinux, member->name_off); 9712 func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type, 9713 NULL); 9714 if (!func_proto) { 9715 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n", 9716 mname, member_idx, st_ops->name); 9717 return -EINVAL; 9718 } 9719 9720 if (st_ops->check_member) { 9721 int err = st_ops->check_member(t, member); 9722 9723 if (err) { 9724 verbose(env, "attach to unsupported member %s of struct %s\n", 9725 mname, st_ops->name); 9726 return err; 9727 } 9728 } 9729 9730 prog->aux->attach_func_proto = func_proto; 9731 prog->aux->attach_func_name = mname; 9732 env->ops = st_ops->verifier_ops; 9733 9734 return 0; 9735 } 9736 9737 static int check_attach_btf_id(struct bpf_verifier_env *env) 9738 { 9739 struct bpf_prog *prog = env->prog; 9740 struct bpf_prog *tgt_prog = prog->aux->linked_prog; 9741 u32 btf_id = prog->aux->attach_btf_id; 9742 const char prefix[] = "btf_trace_"; 9743 int ret = 0, subprog = -1, i; 9744 struct bpf_trampoline *tr; 9745 const struct btf_type *t; 9746 bool conservative = true; 9747 const char *tname; 9748 struct btf *btf; 9749 long addr; 9750 u64 key; 9751 9752 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) 9753 return check_struct_ops_btf_id(env); 9754 9755 if (prog->type != BPF_PROG_TYPE_TRACING) 9756 return 0; 9757 9758 if (!btf_id) { 9759 verbose(env, "Tracing programs must provide btf_id\n"); 9760 return -EINVAL; 9761 } 9762 btf = bpf_prog_get_target_btf(prog); 9763 if (!btf) { 9764 verbose(env, 9765 "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n"); 9766 return -EINVAL; 9767 } 9768 t = btf_type_by_id(btf, btf_id); 9769 if (!t) { 9770 verbose(env, "attach_btf_id %u is invalid\n", btf_id); 9771 return -EINVAL; 9772 } 9773 tname = btf_name_by_offset(btf, t->name_off); 9774 if (!tname) { 9775 verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id); 9776 return -EINVAL; 9777 } 9778 if (tgt_prog) { 9779 struct bpf_prog_aux *aux = tgt_prog->aux; 9780 9781 for (i = 0; i < aux->func_info_cnt; i++) 9782 if (aux->func_info[i].type_id == btf_id) { 9783 subprog = i; 9784 break; 9785 } 9786 if (subprog == -1) { 9787 verbose(env, "Subprog %s doesn't exist\n", tname); 9788 return -EINVAL; 9789 } 9790 conservative = aux->func_info_aux[subprog].unreliable; 9791 key = ((u64)aux->id) << 32 | btf_id; 9792 } else { 9793 key = btf_id; 9794 } 9795 9796 switch (prog->expected_attach_type) { 9797 case BPF_TRACE_RAW_TP: 9798 if (tgt_prog) { 9799 verbose(env, 9800 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n"); 9801 return -EINVAL; 9802 } 9803 if (!btf_type_is_typedef(t)) { 9804 verbose(env, "attach_btf_id %u is not a typedef\n", 9805 btf_id); 9806 return -EINVAL; 9807 } 9808 if (strncmp(prefix, tname, sizeof(prefix) - 1)) { 9809 verbose(env, "attach_btf_id %u points to wrong type name %s\n", 9810 btf_id, tname); 9811 return -EINVAL; 9812 } 9813 tname += sizeof(prefix) - 1; 9814 t = btf_type_by_id(btf, t->type); 9815 if (!btf_type_is_ptr(t)) 9816 /* should never happen in valid vmlinux build */ 9817 return -EINVAL; 9818 t = btf_type_by_id(btf, t->type); 9819 if (!btf_type_is_func_proto(t)) 9820 /* should never happen in valid vmlinux build */ 9821 return -EINVAL; 9822 9823 /* remember two read only pointers that are valid for 9824 * the life time of the kernel 9825 */ 9826 prog->aux->attach_func_name = tname; 9827 prog->aux->attach_func_proto = t; 9828 prog->aux->attach_btf_trace = true; 9829 return 0; 9830 case BPF_TRACE_FENTRY: 9831 case BPF_TRACE_FEXIT: 9832 if (!btf_type_is_func(t)) { 9833 verbose(env, "attach_btf_id %u is not a function\n", 9834 btf_id); 9835 return -EINVAL; 9836 } 9837 t = btf_type_by_id(btf, t->type); 9838 if (!btf_type_is_func_proto(t)) 9839 return -EINVAL; 9840 tr = bpf_trampoline_lookup(key); 9841 if (!tr) 9842 return -ENOMEM; 9843 prog->aux->attach_func_name = tname; 9844 /* t is either vmlinux type or another program's type */ 9845 prog->aux->attach_func_proto = t; 9846 mutex_lock(&tr->mutex); 9847 if (tr->func.addr) { 9848 prog->aux->trampoline = tr; 9849 goto out; 9850 } 9851 if (tgt_prog && conservative) { 9852 prog->aux->attach_func_proto = NULL; 9853 t = NULL; 9854 } 9855 ret = btf_distill_func_proto(&env->log, btf, t, 9856 tname, &tr->func.model); 9857 if (ret < 0) 9858 goto out; 9859 if (tgt_prog) { 9860 if (!tgt_prog->jited) { 9861 /* for now */ 9862 verbose(env, "Can trace only JITed BPF progs\n"); 9863 ret = -EINVAL; 9864 goto out; 9865 } 9866 if (tgt_prog->type == BPF_PROG_TYPE_TRACING) { 9867 /* prevent cycles */ 9868 verbose(env, "Cannot recursively attach\n"); 9869 ret = -EINVAL; 9870 goto out; 9871 } 9872 if (subprog == 0) 9873 addr = (long) tgt_prog->bpf_func; 9874 else 9875 addr = (long) tgt_prog->aux->func[subprog]->bpf_func; 9876 } else { 9877 addr = kallsyms_lookup_name(tname); 9878 if (!addr) { 9879 verbose(env, 9880 "The address of function %s cannot be found\n", 9881 tname); 9882 ret = -ENOENT; 9883 goto out; 9884 } 9885 } 9886 tr->func.addr = (void *)addr; 9887 prog->aux->trampoline = tr; 9888 out: 9889 mutex_unlock(&tr->mutex); 9890 if (ret) 9891 bpf_trampoline_put(tr); 9892 return ret; 9893 default: 9894 return -EINVAL; 9895 } 9896 } 9897 9898 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, 9899 union bpf_attr __user *uattr) 9900 { 9901 u64 start_time = ktime_get_ns(); 9902 struct bpf_verifier_env *env; 9903 struct bpf_verifier_log *log; 9904 int i, len, ret = -EINVAL; 9905 bool is_priv; 9906 9907 /* no program is valid */ 9908 if (ARRAY_SIZE(bpf_verifier_ops) == 0) 9909 return -EINVAL; 9910 9911 /* 'struct bpf_verifier_env' can be global, but since it's not small, 9912 * allocate/free it every time bpf_check() is called 9913 */ 9914 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); 9915 if (!env) 9916 return -ENOMEM; 9917 log = &env->log; 9918 9919 len = (*prog)->len; 9920 env->insn_aux_data = 9921 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len)); 9922 ret = -ENOMEM; 9923 if (!env->insn_aux_data) 9924 goto err_free_env; 9925 for (i = 0; i < len; i++) 9926 env->insn_aux_data[i].orig_idx = i; 9927 env->prog = *prog; 9928 env->ops = bpf_verifier_ops[env->prog->type]; 9929 is_priv = capable(CAP_SYS_ADMIN); 9930 9931 if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { 9932 mutex_lock(&bpf_verifier_lock); 9933 if (!btf_vmlinux) 9934 btf_vmlinux = btf_parse_vmlinux(); 9935 mutex_unlock(&bpf_verifier_lock); 9936 } 9937 9938 /* grab the mutex to protect few globals used by verifier */ 9939 if (!is_priv) 9940 mutex_lock(&bpf_verifier_lock); 9941 9942 if (attr->log_level || attr->log_buf || attr->log_size) { 9943 /* user requested verbose verifier output 9944 * and supplied buffer to store the verification trace 9945 */ 9946 log->level = attr->log_level; 9947 log->ubuf = (char __user *) (unsigned long) attr->log_buf; 9948 log->len_total = attr->log_size; 9949 9950 ret = -EINVAL; 9951 /* log attributes have to be sane */ 9952 if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 || 9953 !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK) 9954 goto err_unlock; 9955 } 9956 9957 if (IS_ERR(btf_vmlinux)) { 9958 /* Either gcc or pahole or kernel are broken. */ 9959 verbose(env, "in-kernel BTF is malformed\n"); 9960 ret = PTR_ERR(btf_vmlinux); 9961 goto skip_full_check; 9962 } 9963 9964 ret = check_attach_btf_id(env); 9965 if (ret) 9966 goto skip_full_check; 9967 9968 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); 9969 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) 9970 env->strict_alignment = true; 9971 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) 9972 env->strict_alignment = false; 9973 9974 env->allow_ptr_leaks = is_priv; 9975 9976 if (is_priv) 9977 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; 9978 9979 ret = replace_map_fd_with_map_ptr(env); 9980 if (ret < 0) 9981 goto skip_full_check; 9982 9983 if (bpf_prog_is_dev_bound(env->prog->aux)) { 9984 ret = bpf_prog_offload_verifier_prep(env->prog); 9985 if (ret) 9986 goto skip_full_check; 9987 } 9988 9989 env->explored_states = kvcalloc(state_htab_size(env), 9990 sizeof(struct bpf_verifier_state_list *), 9991 GFP_USER); 9992 ret = -ENOMEM; 9993 if (!env->explored_states) 9994 goto skip_full_check; 9995 9996 ret = check_subprogs(env); 9997 if (ret < 0) 9998 goto skip_full_check; 9999 10000 ret = check_btf_info(env, attr, uattr); 10001 if (ret < 0) 10002 goto skip_full_check; 10003 10004 ret = check_cfg(env); 10005 if (ret < 0) 10006 goto skip_full_check; 10007 10008 ret = do_check_subprogs(env); 10009 ret = ret ?: do_check_main(env); 10010 10011 if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux)) 10012 ret = bpf_prog_offload_finalize(env); 10013 10014 skip_full_check: 10015 kvfree(env->explored_states); 10016 10017 if (ret == 0) 10018 ret = check_max_stack_depth(env); 10019 10020 /* instruction rewrites happen after this point */ 10021 if (is_priv) { 10022 if (ret == 0) 10023 opt_hard_wire_dead_code_branches(env); 10024 if (ret == 0) 10025 ret = opt_remove_dead_code(env); 10026 if (ret == 0) 10027 ret = opt_remove_nops(env); 10028 } else { 10029 if (ret == 0) 10030 sanitize_dead_code(env); 10031 } 10032 10033 if (ret == 0) 10034 /* program is valid, convert *(u32*)(ctx + off) accesses */ 10035 ret = convert_ctx_accesses(env); 10036 10037 if (ret == 0) 10038 ret = fixup_bpf_calls(env); 10039 10040 /* do 32-bit optimization after insn patching has done so those patched 10041 * insns could be handled correctly. 10042 */ 10043 if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) { 10044 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr); 10045 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret 10046 : false; 10047 } 10048 10049 if (ret == 0) 10050 ret = fixup_call_args(env); 10051 10052 env->verification_time = ktime_get_ns() - start_time; 10053 print_verification_stats(env); 10054 10055 if (log->level && bpf_verifier_log_full(log)) 10056 ret = -ENOSPC; 10057 if (log->level && !log->ubuf) { 10058 ret = -EFAULT; 10059 goto err_release_maps; 10060 } 10061 10062 if (ret == 0 && env->used_map_cnt) { 10063 /* if program passed verifier, update used_maps in bpf_prog_info */ 10064 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt, 10065 sizeof(env->used_maps[0]), 10066 GFP_KERNEL); 10067 10068 if (!env->prog->aux->used_maps) { 10069 ret = -ENOMEM; 10070 goto err_release_maps; 10071 } 10072 10073 memcpy(env->prog->aux->used_maps, env->used_maps, 10074 sizeof(env->used_maps[0]) * env->used_map_cnt); 10075 env->prog->aux->used_map_cnt = env->used_map_cnt; 10076 10077 /* program is valid. Convert pseudo bpf_ld_imm64 into generic 10078 * bpf_ld_imm64 instructions 10079 */ 10080 convert_pseudo_ld_imm64(env); 10081 } 10082 10083 if (ret == 0) 10084 adjust_btf_func(env); 10085 10086 err_release_maps: 10087 if (!env->prog->aux->used_maps) 10088 /* if we didn't copy map pointers into bpf_prog_info, release 10089 * them now. Otherwise free_used_maps() will release them. 10090 */ 10091 release_maps(env); 10092 *prog = env->prog; 10093 err_unlock: 10094 if (!is_priv) 10095 mutex_unlock(&bpf_verifier_lock); 10096 vfree(env->insn_aux_data); 10097 err_free_env: 10098 kfree(env); 10099 return ret; 10100 } 10101