1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ 3 #include <linux/bpf.h> 4 #include <linux/bpf_verifier.h> 5 #include <linux/filter.h> 6 7 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args) 8 9 #define BPF_COMPLEXITY_LIMIT_STATES 64 10 11 static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx) 12 { 13 return bpf_is_may_goto_insn(&env->prog->insnsi[insn_idx]); 14 } 15 16 static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx) 17 { 18 return env->insn_aux_data[insn_idx].is_iter_next; 19 } 20 21 static void update_peak_states(struct bpf_verifier_env *env) 22 { 23 u32 cur_states; 24 25 cur_states = env->explored_states_size + env->free_list_size + env->num_backedges; 26 env->peak_states = max(env->peak_states, cur_states); 27 } 28 29 /* struct bpf_verifier_state->parent refers to states 30 * that are in either of env->{expored_states,free_list}. 31 * In both cases the state is contained in struct bpf_verifier_state_list. 32 */ 33 static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st) 34 { 35 if (st->parent) 36 return container_of(st->parent, struct bpf_verifier_state_list, state); 37 return NULL; 38 } 39 40 static bool incomplete_read_marks(struct bpf_verifier_env *env, 41 struct bpf_verifier_state *st); 42 43 /* A state can be freed if it is no longer referenced: 44 * - is in the env->free_list; 45 * - has no children states; 46 */ 47 static void maybe_free_verifier_state(struct bpf_verifier_env *env, 48 struct bpf_verifier_state_list *sl) 49 { 50 if (!sl->in_free_list 51 || sl->state.branches != 0 52 || incomplete_read_marks(env, &sl->state)) 53 return; 54 list_del(&sl->node); 55 bpf_free_verifier_state(&sl->state, false); 56 kfree(sl); 57 env->free_list_size--; 58 } 59 60 /* For state @st look for a topmost frame with frame_insn_idx() in some SCC, 61 * if such frame exists form a corresponding @callchain as an array of 62 * call sites leading to this frame and SCC id. 63 * E.g.: 64 * 65 * void foo() { A: loop {... SCC#1 ...}; } 66 * void bar() { B: loop { C: foo(); ... SCC#2 ... } 67 * D: loop { E: foo(); ... SCC#3 ... } } 68 * void main() { F: bar(); } 69 * 70 * @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending 71 * on @st frame call sites being (F,C,A) or (F,E,A). 72 */ 73 static bool compute_scc_callchain(struct bpf_verifier_env *env, 74 struct bpf_verifier_state *st, 75 struct bpf_scc_callchain *callchain) 76 { 77 u32 i, scc, insn_idx; 78 79 memset(callchain, 0, sizeof(*callchain)); 80 for (i = 0; i <= st->curframe; i++) { 81 insn_idx = bpf_frame_insn_idx(st, i); 82 scc = env->insn_aux_data[insn_idx].scc; 83 if (scc) { 84 callchain->scc = scc; 85 break; 86 } else if (i < st->curframe) { 87 callchain->callsites[i] = insn_idx; 88 } else { 89 return false; 90 } 91 } 92 return true; 93 } 94 95 /* Check if bpf_scc_visit instance for @callchain exists. */ 96 static struct bpf_scc_visit *scc_visit_lookup(struct bpf_verifier_env *env, 97 struct bpf_scc_callchain *callchain) 98 { 99 struct bpf_scc_info *info = env->scc_info[callchain->scc]; 100 struct bpf_scc_visit *visits = info->visits; 101 u32 i; 102 103 if (!info) 104 return NULL; 105 for (i = 0; i < info->num_visits; i++) 106 if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0) 107 return &visits[i]; 108 return NULL; 109 } 110 111 /* Allocate a new bpf_scc_visit instance corresponding to @callchain. 112 * Allocated instances are alive for a duration of the do_check_common() 113 * call and are freed by free_states(). 114 */ 115 static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env, 116 struct bpf_scc_callchain *callchain) 117 { 118 struct bpf_scc_visit *visit; 119 struct bpf_scc_info *info; 120 u32 scc, num_visits; 121 u64 new_sz; 122 123 scc = callchain->scc; 124 info = env->scc_info[scc]; 125 num_visits = info ? info->num_visits : 0; 126 new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1); 127 info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT); 128 if (!info) 129 return NULL; 130 env->scc_info[scc] = info; 131 info->num_visits = num_visits + 1; 132 visit = &info->visits[num_visits]; 133 memset(visit, 0, sizeof(*visit)); 134 memcpy(&visit->callchain, callchain, sizeof(*callchain)); 135 return visit; 136 } 137 138 /* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */ 139 static char *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain) 140 { 141 char *buf = env->tmp_str_buf; 142 int i, delta = 0; 143 144 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "("); 145 for (i = 0; i < ARRAY_SIZE(callchain->callsites); i++) { 146 if (!callchain->callsites[i]) 147 break; 148 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u,", 149 callchain->callsites[i]); 150 } 151 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u)", callchain->scc); 152 return env->tmp_str_buf; 153 } 154 155 /* If callchain for @st exists (@st is in some SCC), ensure that 156 * bpf_scc_visit instance for this callchain exists. 157 * If instance does not exist or is empty, assign visit->entry_state to @st. 158 */ 159 static int maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 160 { 161 struct bpf_scc_callchain *callchain = &env->callchain_buf; 162 struct bpf_scc_visit *visit; 163 164 if (!compute_scc_callchain(env, st, callchain)) 165 return 0; 166 visit = scc_visit_lookup(env, callchain); 167 visit = visit ?: scc_visit_alloc(env, callchain); 168 if (!visit) 169 return -ENOMEM; 170 if (!visit->entry_state) { 171 visit->entry_state = st; 172 if (env->log.level & BPF_LOG_LEVEL2) 173 verbose(env, "SCC enter %s\n", format_callchain(env, callchain)); 174 } 175 return 0; 176 } 177 178 static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit); 179 180 /* If callchain for @st exists (@st is in some SCC), make it empty: 181 * - set visit->entry_state to NULL; 182 * - flush accumulated backedges. 183 */ 184 static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 185 { 186 struct bpf_scc_callchain *callchain = &env->callchain_buf; 187 struct bpf_scc_visit *visit; 188 189 if (!compute_scc_callchain(env, st, callchain)) 190 return 0; 191 visit = scc_visit_lookup(env, callchain); 192 if (!visit) { 193 /* 194 * If path traversal stops inside an SCC, corresponding bpf_scc_visit 195 * must exist for non-speculative paths. For non-speculative paths 196 * traversal stops when: 197 * a. Verification error is found, maybe_exit_scc() is not called. 198 * b. Top level BPF_EXIT is reached. Top level BPF_EXIT is not a member 199 * of any SCC. 200 * c. A checkpoint is reached and matched. Checkpoints are created by 201 * is_state_visited(), which calls maybe_enter_scc(), which allocates 202 * bpf_scc_visit instances for checkpoints within SCCs. 203 * (c) is the only case that can reach this point. 204 */ 205 if (!st->speculative) { 206 verifier_bug(env, "scc exit: no visit info for call chain %s", 207 format_callchain(env, callchain)); 208 return -EFAULT; 209 } 210 return 0; 211 } 212 if (visit->entry_state != st) 213 return 0; 214 if (env->log.level & BPF_LOG_LEVEL2) 215 verbose(env, "SCC exit %s\n", format_callchain(env, callchain)); 216 visit->entry_state = NULL; 217 env->num_backedges -= visit->num_backedges; 218 visit->num_backedges = 0; 219 update_peak_states(env); 220 return propagate_backedges(env, visit); 221 } 222 223 /* Lookup an bpf_scc_visit instance corresponding to @st callchain 224 * and add @backedge to visit->backedges. @st callchain must exist. 225 */ 226 static int add_scc_backedge(struct bpf_verifier_env *env, 227 struct bpf_verifier_state *st, 228 struct bpf_scc_backedge *backedge) 229 { 230 struct bpf_scc_callchain *callchain = &env->callchain_buf; 231 struct bpf_scc_visit *visit; 232 233 if (!compute_scc_callchain(env, st, callchain)) { 234 verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d", 235 st->insn_idx); 236 return -EFAULT; 237 } 238 visit = scc_visit_lookup(env, callchain); 239 if (!visit) { 240 verifier_bug(env, "add backedge: no visit info for call chain %s", 241 format_callchain(env, callchain)); 242 return -EFAULT; 243 } 244 if (env->log.level & BPF_LOG_LEVEL2) 245 verbose(env, "SCC backedge %s\n", format_callchain(env, callchain)); 246 backedge->next = visit->backedges; 247 visit->backedges = backedge; 248 visit->num_backedges++; 249 env->num_backedges++; 250 update_peak_states(env); 251 return 0; 252 } 253 254 /* bpf_reg_state->live marks for registers in a state @st are incomplete, 255 * if state @st is in some SCC and not all execution paths starting at this 256 * SCC are fully explored. 257 */ 258 static bool incomplete_read_marks(struct bpf_verifier_env *env, 259 struct bpf_verifier_state *st) 260 { 261 struct bpf_scc_callchain *callchain = &env->callchain_buf; 262 struct bpf_scc_visit *visit; 263 264 if (!compute_scc_callchain(env, st, callchain)) 265 return false; 266 visit = scc_visit_lookup(env, callchain); 267 if (!visit) 268 return false; 269 return !!visit->backedges; 270 } 271 272 int bpf_update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 273 { 274 struct bpf_verifier_state_list *sl = NULL, *parent_sl; 275 struct bpf_verifier_state *parent; 276 int err; 277 278 while (st) { 279 u32 br = --st->branches; 280 281 /* verifier_bug_if(br > 1, ...) technically makes sense here, 282 * but see comment in push_stack(), hence: 283 */ 284 verifier_bug_if((int)br < 0, env, "%s:branches_to_explore=%d", __func__, br); 285 if (br) 286 break; 287 err = maybe_exit_scc(env, st); 288 if (err) 289 return err; 290 parent = st->parent; 291 parent_sl = state_parent_as_list(st); 292 if (sl) 293 maybe_free_verifier_state(env, sl); 294 st = parent; 295 sl = parent_sl; 296 } 297 return 0; 298 } 299 300 /* check %cur's range satisfies %old's */ 301 static bool range_within(const struct bpf_reg_state *old, 302 const struct bpf_reg_state *cur) 303 { 304 return old->umin_value <= cur->umin_value && 305 old->umax_value >= cur->umax_value && 306 old->smin_value <= cur->smin_value && 307 old->smax_value >= cur->smax_value && 308 old->u32_min_value <= cur->u32_min_value && 309 old->u32_max_value >= cur->u32_max_value && 310 old->s32_min_value <= cur->s32_min_value && 311 old->s32_max_value >= cur->s32_max_value; 312 } 313 314 /* If in the old state two registers had the same id, then they need to have 315 * the same id in the new state as well. But that id could be different from 316 * the old state, so we need to track the mapping from old to new ids. 317 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent 318 * regs with old id 5 must also have new id 9 for the new state to be safe. But 319 * regs with a different old id could still have new id 9, we don't care about 320 * that. 321 * So we look through our idmap to see if this old id has been seen before. If 322 * so, we require the new id to match; otherwise, we add the id pair to the map. 323 */ 324 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) 325 { 326 struct bpf_id_pair *map = idmap->map; 327 unsigned int i; 328 329 /* either both IDs should be set or both should be zero */ 330 if (!!old_id != !!cur_id) 331 return false; 332 333 if (old_id == 0) /* cur_id == 0 as well */ 334 return true; 335 336 for (i = 0; i < idmap->cnt; i++) { 337 if (map[i].old == old_id) 338 return map[i].cur == cur_id; 339 if (map[i].cur == cur_id) 340 return false; 341 } 342 343 /* Reached the end of known mappings; haven't seen this id before */ 344 if (idmap->cnt < BPF_ID_MAP_SIZE) { 345 map[idmap->cnt].old = old_id; 346 map[idmap->cnt].cur = cur_id; 347 idmap->cnt++; 348 return true; 349 } 350 351 /* We ran out of idmap slots, which should be impossible */ 352 WARN_ON_ONCE(1); 353 return false; 354 } 355 356 /* 357 * Compare scalar register IDs for state equivalence. 358 * 359 * When old_id == 0, the old register is independent - not linked to any 360 * other register. Any linking in the current state only adds constraints, 361 * making it more restrictive. Since the old state didn't rely on any ID 362 * relationships for this register, it's always safe to accept cur regardless 363 * of its ID. Hence, return true immediately. 364 * 365 * When old_id != 0 but cur_id == 0, we need to ensure that different 366 * independent registers in cur don't incorrectly satisfy the ID matching 367 * requirements of linked registers in old. 368 * 369 * Example: if old has r6.id=X and r7.id=X (linked), but cur has r6.id=0 370 * and r7.id=0 (both independent), without temp IDs both would map old_id=X 371 * to cur_id=0 and pass. With temp IDs: r6 maps X->temp1, r7 tries to map 372 * X->temp2, but X is already mapped to temp1, so the check fails correctly. 373 * 374 * When old_id has BPF_ADD_CONST set, the compound id (base | flag) and the 375 * base id (flag stripped) must both map consistently. Example: old has 376 * r2.id=A, r3.id=A|flag (r3 = r2 + delta), cur has r2.id=B, r3.id=C|flag 377 * (r3 derived from unrelated r4). Without the base check, idmap gets two 378 * independent entries A->B and A|flag->C|flag, missing that A->C conflicts 379 * with A->B. The base ID cross-check catches this. 380 */ 381 static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) 382 { 383 if (!old_id) 384 return true; 385 386 cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen; 387 388 if (!check_ids(old_id, cur_id, idmap)) 389 return false; 390 if (old_id & BPF_ADD_CONST) { 391 old_id &= ~BPF_ADD_CONST; 392 cur_id &= ~BPF_ADD_CONST; 393 if (!check_ids(old_id, cur_id, idmap)) 394 return false; 395 } 396 return true; 397 } 398 399 static void __clean_func_state(struct bpf_verifier_env *env, 400 struct bpf_func_state *st, 401 u16 live_regs, int frame) 402 { 403 int i, j; 404 405 for (i = 0; i < BPF_REG_FP; i++) { 406 /* liveness must not touch this register anymore */ 407 if (!(live_regs & BIT(i))) 408 /* since the register is unused, clear its state 409 * to make further comparison simpler 410 */ 411 bpf_mark_reg_not_init(env, &st->regs[i]); 412 } 413 414 /* 415 * Clean dead 4-byte halves within each SPI independently. 416 * half_spi 2*i → lower half: slot_type[0..3] (closer to FP) 417 * half_spi 2*i+1 → upper half: slot_type[4..7] (farther from FP) 418 */ 419 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) { 420 bool lo_live = bpf_stack_slot_alive(env, frame, i * 2); 421 bool hi_live = bpf_stack_slot_alive(env, frame, i * 2 + 1); 422 423 if (!hi_live || !lo_live) { 424 int start = !lo_live ? 0 : BPF_REG_SIZE / 2; 425 int end = !hi_live ? BPF_REG_SIZE : BPF_REG_SIZE / 2; 426 u8 stype = st->stack[i].slot_type[7]; 427 428 /* 429 * Don't clear special slots. 430 * destroy_if_dynptr_stack_slot() needs STACK_DYNPTR to 431 * detect overwrites and invalidate associated data slices. 432 * is_iter_reg_valid_uninit() and is_irq_flag_reg_valid_uninit() 433 * check for their respective slot types to detect double-create. 434 */ 435 if (stype == STACK_DYNPTR || stype == STACK_ITER || 436 stype == STACK_IRQ_FLAG) 437 continue; 438 439 /* 440 * Only destroy spilled_ptr when hi half is dead. 441 * If hi half is still live with STACK_SPILL, the 442 * spilled_ptr metadata is needed for correct state 443 * comparison in stacksafe(). 444 * is_spilled_reg() is using slot_type[7], but 445 * is_spilled_scalar_after() check either slot_type[0] or [4] 446 */ 447 if (!hi_live) { 448 struct bpf_reg_state *spill = &st->stack[i].spilled_ptr; 449 450 if (lo_live && stype == STACK_SPILL) { 451 u8 val = STACK_MISC; 452 453 /* 454 * 8 byte spill of scalar 0 where half slot is dead 455 * should become STACK_ZERO in lo 4 bytes. 456 */ 457 if (bpf_register_is_null(spill)) 458 val = STACK_ZERO; 459 for (j = 0; j < 4; j++) { 460 u8 *t = &st->stack[i].slot_type[j]; 461 462 if (*t == STACK_SPILL) 463 *t = val; 464 } 465 } 466 bpf_mark_reg_not_init(env, spill); 467 } 468 for (j = start; j < end; j++) 469 st->stack[i].slot_type[j] = STACK_POISON; 470 } 471 } 472 } 473 474 static int clean_verifier_state(struct bpf_verifier_env *env, 475 struct bpf_verifier_state *st) 476 { 477 int i, err; 478 479 err = bpf_live_stack_query_init(env, st); 480 if (err) 481 return err; 482 for (i = 0; i <= st->curframe; i++) { 483 u32 ip = bpf_frame_insn_idx(st, i); 484 u16 live_regs = env->insn_aux_data[ip].live_regs_before; 485 486 __clean_func_state(env, st->frame[i], live_regs, i); 487 } 488 return 0; 489 } 490 491 static bool regs_exact(const struct bpf_reg_state *rold, 492 const struct bpf_reg_state *rcur, 493 struct bpf_idmap *idmap) 494 { 495 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 496 check_ids(rold->id, rcur->id, idmap) && 497 check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); 498 } 499 500 enum exact_level { 501 NOT_EXACT, 502 EXACT, 503 RANGE_WITHIN 504 }; 505 506 /* Returns true if (rold safe implies rcur safe) */ 507 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, 508 struct bpf_reg_state *rcur, struct bpf_idmap *idmap, 509 enum exact_level exact) 510 { 511 if (exact == EXACT) 512 return regs_exact(rold, rcur, idmap); 513 514 if (rold->type == NOT_INIT) 515 /* explored state can't have used this */ 516 return true; 517 518 /* Enforce that register types have to match exactly, including their 519 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general 520 * rule. 521 * 522 * One can make a point that using a pointer register as unbounded 523 * SCALAR would be technically acceptable, but this could lead to 524 * pointer leaks because scalars are allowed to leak while pointers 525 * are not. We could make this safe in special cases if root is 526 * calling us, but it's probably not worth the hassle. 527 * 528 * Also, register types that are *not* MAYBE_NULL could technically be 529 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE 530 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point 531 * to the same map). 532 * However, if the old MAYBE_NULL register then got NULL checked, 533 * doing so could have affected others with the same id, and we can't 534 * check for that because we lost the id when we converted to 535 * a non-MAYBE_NULL variant. 536 * So, as a general rule we don't allow mixing MAYBE_NULL and 537 * non-MAYBE_NULL registers as well. 538 */ 539 if (rold->type != rcur->type) 540 return false; 541 542 switch (base_type(rold->type)) { 543 case SCALAR_VALUE: 544 if (env->explore_alu_limits) { 545 /* explore_alu_limits disables tnum_in() and range_within() 546 * logic and requires everything to be strict 547 */ 548 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 549 check_scalar_ids(rold->id, rcur->id, idmap); 550 } 551 if (!rold->precise && exact == NOT_EXACT) 552 return true; 553 /* 554 * Linked register tracking uses rold->id to detect relationships. 555 * When rold->id == 0, the register is independent and any linking 556 * in rcur only adds constraints. When rold->id != 0, we must verify 557 * id mapping and (for BPF_ADD_CONST) offset consistency. 558 * 559 * +------------------+-----------+------------------+---------------+ 560 * | | rold->id | rold + ADD_CONST | rold->id == 0 | 561 * |------------------+-----------+------------------+---------------| 562 * | rcur->id | range,ids | false | range | 563 * | rcur + ADD_CONST | false | range,ids,off | range | 564 * | rcur->id == 0 | range,ids | false | range | 565 * +------------------+-----------+------------------+---------------+ 566 * 567 * Why check_ids() for scalar registers? 568 * 569 * Consider the following BPF code: 570 * 1: r6 = ... unbound scalar, ID=a ... 571 * 2: r7 = ... unbound scalar, ID=b ... 572 * 3: if (r6 > r7) goto +1 573 * 4: r6 = r7 574 * 5: if (r6 > X) goto ... 575 * 6: ... memory operation using r7 ... 576 * 577 * First verification path is [1-6]: 578 * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7; 579 * - at (5) r6 would be marked <= X, sync_linked_regs() would also mark 580 * r7 <= X, because r6 and r7 share same id. 581 * Next verification path is [1-4, 6]. 582 * 583 * Instruction (6) would be reached in two states: 584 * I. r6{.id=b}, r7{.id=b} via path 1-6; 585 * II. r6{.id=a}, r7{.id=b} via path 1-4, 6. 586 * 587 * Use check_ids() to distinguish these states. 588 * --- 589 * Also verify that new value satisfies old value range knowledge. 590 */ 591 592 /* 593 * ADD_CONST flags must match exactly: BPF_ADD_CONST32 and 594 * BPF_ADD_CONST64 have different linking semantics in 595 * sync_linked_regs() (alu32 zero-extends, alu64 does not), 596 * so pruning across different flag types is unsafe. 597 */ 598 if (rold->id && 599 (rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST)) 600 return false; 601 602 /* Both have offset linkage: offsets must match */ 603 if ((rold->id & BPF_ADD_CONST) && rold->delta != rcur->delta) 604 return false; 605 606 if (!check_scalar_ids(rold->id, rcur->id, idmap)) 607 return false; 608 609 return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off); 610 case PTR_TO_MAP_KEY: 611 case PTR_TO_MAP_VALUE: 612 case PTR_TO_MEM: 613 case PTR_TO_BUF: 614 case PTR_TO_TP_BUFFER: 615 /* If the new min/max/var_off satisfy the old ones and 616 * everything else matches, we are OK. 617 */ 618 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 && 619 range_within(rold, rcur) && 620 tnum_in(rold->var_off, rcur->var_off) && 621 check_ids(rold->id, rcur->id, idmap) && 622 check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); 623 case PTR_TO_PACKET_META: 624 case PTR_TO_PACKET: 625 /* We must have at least as much range as the old ptr 626 * did, so that any accesses which were safe before are 627 * still safe. This is true even if old range < old off, 628 * since someone could have accessed through (ptr - k), or 629 * even done ptr -= k in a register, to get a safe access. 630 */ 631 if (rold->range < 0 || rcur->range < 0) { 632 /* special case for [BEYOND|AT]_PKT_END */ 633 if (rold->range != rcur->range) 634 return false; 635 } else if (rold->range > rcur->range) { 636 return false; 637 } 638 /* id relations must be preserved */ 639 if (!check_ids(rold->id, rcur->id, idmap)) 640 return false; 641 /* new val must satisfy old val knowledge */ 642 return range_within(rold, rcur) && 643 tnum_in(rold->var_off, rcur->var_off); 644 case PTR_TO_STACK: 645 /* two stack pointers are equal only if they're pointing to 646 * the same stack frame, since fp-8 in foo != fp-8 in bar 647 */ 648 return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno; 649 case PTR_TO_ARENA: 650 return true; 651 case PTR_TO_INSN: 652 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 && 653 range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off); 654 default: 655 return regs_exact(rold, rcur, idmap); 656 } 657 } 658 659 static struct bpf_reg_state unbound_reg; 660 661 static __init int unbound_reg_init(void) 662 { 663 bpf_mark_reg_unknown_imprecise(&unbound_reg); 664 return 0; 665 } 666 late_initcall(unbound_reg_init); 667 668 static bool is_spilled_scalar_after(const struct bpf_stack_state *stack, int im) 669 { 670 return stack->slot_type[im] == STACK_SPILL && 671 stack->spilled_ptr.type == SCALAR_VALUE; 672 } 673 674 static bool is_stack_misc_after(struct bpf_verifier_env *env, 675 struct bpf_stack_state *stack, int im) 676 { 677 u32 i; 678 679 for (i = im; i < ARRAY_SIZE(stack->slot_type); ++i) { 680 if ((stack->slot_type[i] == STACK_MISC) || 681 ((stack->slot_type[i] == STACK_INVALID || stack->slot_type[i] == STACK_POISON) && 682 env->allow_uninit_stack)) 683 continue; 684 return false; 685 } 686 687 return true; 688 } 689 690 static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env, 691 struct bpf_stack_state *stack, int im) 692 { 693 if (is_spilled_scalar_after(stack, im)) 694 return &stack->spilled_ptr; 695 696 if (is_stack_misc_after(env, stack, im)) 697 return &unbound_reg; 698 699 return NULL; 700 } 701 702 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, 703 struct bpf_func_state *cur, struct bpf_idmap *idmap, 704 enum exact_level exact) 705 { 706 int i, spi; 707 708 /* walk slots of the explored stack and ignore any additional 709 * slots in the current stack, since explored(safe) state 710 * didn't use them 711 */ 712 for (i = 0; i < old->allocated_stack; i++) { 713 struct bpf_reg_state *old_reg, *cur_reg; 714 int im = i % BPF_REG_SIZE; 715 716 spi = i / BPF_REG_SIZE; 717 718 if (exact == EXACT) { 719 u8 old_type = old->stack[spi].slot_type[i % BPF_REG_SIZE]; 720 u8 cur_type = i < cur->allocated_stack ? 721 cur->stack[spi].slot_type[i % BPF_REG_SIZE] : STACK_INVALID; 722 723 /* STACK_INVALID and STACK_POISON are equivalent for pruning */ 724 if (old_type == STACK_POISON) 725 old_type = STACK_INVALID; 726 if (cur_type == STACK_POISON) 727 cur_type = STACK_INVALID; 728 if (i >= cur->allocated_stack || old_type != cur_type) 729 return false; 730 } 731 732 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID || 733 old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_POISON) 734 continue; 735 736 if (env->allow_uninit_stack && 737 old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC) 738 continue; 739 740 /* explored stack has more populated slots than current stack 741 * and these slots were used 742 */ 743 if (i >= cur->allocated_stack) 744 return false; 745 746 /* 747 * 64 and 32-bit scalar spills vs MISC/INVALID slots and vice versa. 748 * Load from MISC/INVALID slots produces unbound scalar. 749 * Construct a fake register for such stack and call 750 * regsafe() to ensure scalar ids are compared. 751 */ 752 if (im == 0 || im == 4) { 753 old_reg = scalar_reg_for_stack(env, &old->stack[spi], im); 754 cur_reg = scalar_reg_for_stack(env, &cur->stack[spi], im); 755 if (old_reg && cur_reg) { 756 if (!regsafe(env, old_reg, cur_reg, idmap, exact)) 757 return false; 758 i += (im == 0 ? BPF_REG_SIZE - 1 : 3); 759 continue; 760 } 761 } 762 763 /* if old state was safe with misc data in the stack 764 * it will be safe with zero-initialized stack. 765 * The opposite is not true 766 */ 767 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC && 768 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO) 769 continue; 770 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != 771 cur->stack[spi].slot_type[i % BPF_REG_SIZE]) 772 /* Ex: old explored (safe) state has STACK_SPILL in 773 * this stack slot, but current has STACK_MISC -> 774 * this verifier states are not equivalent, 775 * return false to continue verification of this path 776 */ 777 return false; 778 if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1) 779 continue; 780 /* Both old and cur are having same slot_type */ 781 switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) { 782 case STACK_SPILL: 783 /* when explored and current stack slot are both storing 784 * spilled registers, check that stored pointers types 785 * are the same as well. 786 * Ex: explored safe path could have stored 787 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8} 788 * but current path has stored: 789 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16} 790 * such verifier states are not equivalent. 791 * return false to continue verification of this path 792 */ 793 if (!regsafe(env, &old->stack[spi].spilled_ptr, 794 &cur->stack[spi].spilled_ptr, idmap, exact)) 795 return false; 796 break; 797 case STACK_DYNPTR: 798 old_reg = &old->stack[spi].spilled_ptr; 799 cur_reg = &cur->stack[spi].spilled_ptr; 800 if (old_reg->dynptr.type != cur_reg->dynptr.type || 801 old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot || 802 !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) 803 return false; 804 break; 805 case STACK_ITER: 806 old_reg = &old->stack[spi].spilled_ptr; 807 cur_reg = &cur->stack[spi].spilled_ptr; 808 /* iter.depth is not compared between states as it 809 * doesn't matter for correctness and would otherwise 810 * prevent convergence; we maintain it only to prevent 811 * infinite loop check triggering, see 812 * iter_active_depths_differ() 813 */ 814 if (old_reg->iter.btf != cur_reg->iter.btf || 815 old_reg->iter.btf_id != cur_reg->iter.btf_id || 816 old_reg->iter.state != cur_reg->iter.state || 817 /* ignore {old_reg,cur_reg}->iter.depth, see above */ 818 !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) 819 return false; 820 break; 821 case STACK_IRQ_FLAG: 822 old_reg = &old->stack[spi].spilled_ptr; 823 cur_reg = &cur->stack[spi].spilled_ptr; 824 if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap) || 825 old_reg->irq.kfunc_class != cur_reg->irq.kfunc_class) 826 return false; 827 break; 828 case STACK_MISC: 829 case STACK_ZERO: 830 case STACK_INVALID: 831 case STACK_POISON: 832 continue; 833 /* Ensure that new unhandled slot types return false by default */ 834 default: 835 return false; 836 } 837 } 838 return true; 839 } 840 841 static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur, 842 struct bpf_idmap *idmap) 843 { 844 int i; 845 846 if (old->acquired_refs != cur->acquired_refs) 847 return false; 848 849 if (old->active_locks != cur->active_locks) 850 return false; 851 852 if (old->active_preempt_locks != cur->active_preempt_locks) 853 return false; 854 855 if (old->active_rcu_locks != cur->active_rcu_locks) 856 return false; 857 858 if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap)) 859 return false; 860 861 if (!check_ids(old->active_lock_id, cur->active_lock_id, idmap) || 862 old->active_lock_ptr != cur->active_lock_ptr) 863 return false; 864 865 for (i = 0; i < old->acquired_refs; i++) { 866 if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) || 867 old->refs[i].type != cur->refs[i].type) 868 return false; 869 switch (old->refs[i].type) { 870 case REF_TYPE_PTR: 871 case REF_TYPE_IRQ: 872 break; 873 case REF_TYPE_LOCK: 874 case REF_TYPE_RES_LOCK: 875 case REF_TYPE_RES_LOCK_IRQ: 876 if (old->refs[i].ptr != cur->refs[i].ptr) 877 return false; 878 break; 879 default: 880 WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type); 881 return false; 882 } 883 } 884 885 return true; 886 } 887 888 /* compare two verifier states 889 * 890 * all states stored in state_list are known to be valid, since 891 * verifier reached 'bpf_exit' instruction through them 892 * 893 * this function is called when verifier exploring different branches of 894 * execution popped from the state stack. If it sees an old state that has 895 * more strict register state and more strict stack state then this execution 896 * branch doesn't need to be explored further, since verifier already 897 * concluded that more strict state leads to valid finish. 898 * 899 * Therefore two states are equivalent if register state is more conservative 900 * and explored stack state is more conservative than the current one. 901 * Example: 902 * explored current 903 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC) 904 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC) 905 * 906 * In other words if current stack state (one being explored) has more 907 * valid slots than old one that already passed validation, it means 908 * the verifier can stop exploring and conclude that current state is valid too 909 * 910 * Similarly with registers. If explored state has register type as invalid 911 * whereas register type in current state is meaningful, it means that 912 * the current state will reach 'bpf_exit' instruction safely 913 */ 914 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, 915 struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact) 916 { 917 u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before; 918 u16 i; 919 920 if (old->callback_depth > cur->callback_depth) 921 return false; 922 923 for (i = 0; i < MAX_BPF_REG; i++) 924 if (((1 << i) & live_regs) && 925 !regsafe(env, &old->regs[i], &cur->regs[i], 926 &env->idmap_scratch, exact)) 927 return false; 928 929 if (!stacksafe(env, old, cur, &env->idmap_scratch, exact)) 930 return false; 931 932 return true; 933 } 934 935 static void reset_idmap_scratch(struct bpf_verifier_env *env) 936 { 937 struct bpf_idmap *idmap = &env->idmap_scratch; 938 939 idmap->tmp_id_gen = env->id_gen; 940 idmap->cnt = 0; 941 } 942 943 static bool states_equal(struct bpf_verifier_env *env, 944 struct bpf_verifier_state *old, 945 struct bpf_verifier_state *cur, 946 enum exact_level exact) 947 { 948 u32 insn_idx; 949 int i; 950 951 if (old->curframe != cur->curframe) 952 return false; 953 954 reset_idmap_scratch(env); 955 956 /* Verification state from speculative execution simulation 957 * must never prune a non-speculative execution one. 958 */ 959 if (old->speculative && !cur->speculative) 960 return false; 961 962 if (old->in_sleepable != cur->in_sleepable) 963 return false; 964 965 if (!refsafe(old, cur, &env->idmap_scratch)) 966 return false; 967 968 /* for states to be equal callsites have to be the same 969 * and all frame states need to be equivalent 970 */ 971 for (i = 0; i <= old->curframe; i++) { 972 insn_idx = bpf_frame_insn_idx(old, i); 973 if (old->frame[i]->callsite != cur->frame[i]->callsite) 974 return false; 975 if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact)) 976 return false; 977 } 978 return true; 979 } 980 981 /* find precise scalars in the previous equivalent state and 982 * propagate them into the current state 983 */ 984 static int propagate_precision(struct bpf_verifier_env *env, 985 const struct bpf_verifier_state *old, 986 struct bpf_verifier_state *cur, 987 bool *changed) 988 { 989 struct bpf_reg_state *state_reg; 990 struct bpf_func_state *state; 991 int i, err = 0, fr; 992 bool first; 993 994 for (fr = old->curframe; fr >= 0; fr--) { 995 state = old->frame[fr]; 996 state_reg = state->regs; 997 first = true; 998 for (i = 0; i < BPF_REG_FP; i++, state_reg++) { 999 if (state_reg->type != SCALAR_VALUE || 1000 !state_reg->precise) 1001 continue; 1002 if (env->log.level & BPF_LOG_LEVEL2) { 1003 if (first) 1004 verbose(env, "frame %d: propagating r%d", fr, i); 1005 else 1006 verbose(env, ",r%d", i); 1007 } 1008 bpf_bt_set_frame_reg(&env->bt, fr, i); 1009 first = false; 1010 } 1011 1012 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 1013 if (!bpf_is_spilled_reg(&state->stack[i])) 1014 continue; 1015 state_reg = &state->stack[i].spilled_ptr; 1016 if (state_reg->type != SCALAR_VALUE || 1017 !state_reg->precise) 1018 continue; 1019 if (env->log.level & BPF_LOG_LEVEL2) { 1020 if (first) 1021 verbose(env, "frame %d: propagating fp%d", 1022 fr, (-i - 1) * BPF_REG_SIZE); 1023 else 1024 verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE); 1025 } 1026 bpf_bt_set_frame_slot(&env->bt, fr, i); 1027 first = false; 1028 } 1029 if (!first && (env->log.level & BPF_LOG_LEVEL2)) 1030 verbose(env, "\n"); 1031 } 1032 1033 err = bpf_mark_chain_precision(env, cur, -1, changed); 1034 if (err < 0) 1035 return err; 1036 1037 return 0; 1038 } 1039 1040 #define MAX_BACKEDGE_ITERS 64 1041 1042 /* Propagate read and precision marks from visit->backedges[*].state->equal_state 1043 * to corresponding parent states of visit->backedges[*].state until fixed point is reached, 1044 * then free visit->backedges. 1045 * After execution of this function incomplete_read_marks() will return false 1046 * for all states corresponding to @visit->callchain. 1047 */ 1048 static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit) 1049 { 1050 struct bpf_scc_backedge *backedge; 1051 struct bpf_verifier_state *st; 1052 bool changed; 1053 int i, err; 1054 1055 i = 0; 1056 do { 1057 if (i++ > MAX_BACKEDGE_ITERS) { 1058 if (env->log.level & BPF_LOG_LEVEL2) 1059 verbose(env, "%s: too many iterations\n", __func__); 1060 for (backedge = visit->backedges; backedge; backedge = backedge->next) 1061 bpf_mark_all_scalars_precise(env, &backedge->state); 1062 break; 1063 } 1064 changed = false; 1065 for (backedge = visit->backedges; backedge; backedge = backedge->next) { 1066 st = &backedge->state; 1067 err = propagate_precision(env, st->equal_state, st, &changed); 1068 if (err) 1069 return err; 1070 } 1071 } while (changed); 1072 1073 bpf_free_backedges(visit); 1074 return 0; 1075 } 1076 1077 static bool states_maybe_looping(struct bpf_verifier_state *old, 1078 struct bpf_verifier_state *cur) 1079 { 1080 struct bpf_func_state *fold, *fcur; 1081 int i, fr = cur->curframe; 1082 1083 if (old->curframe != fr) 1084 return false; 1085 1086 fold = old->frame[fr]; 1087 fcur = cur->frame[fr]; 1088 for (i = 0; i < MAX_BPF_REG; i++) 1089 if (memcmp(&fold->regs[i], &fcur->regs[i], 1090 offsetof(struct bpf_reg_state, frameno))) 1091 return false; 1092 return true; 1093 } 1094 1095 /* is_state_visited() handles iter_next() (see process_iter_next_call() for 1096 * terminology) calls specially: as opposed to bounded BPF loops, it *expects* 1097 * states to match, which otherwise would look like an infinite loop. So while 1098 * iter_next() calls are taken care of, we still need to be careful and 1099 * prevent erroneous and too eager declaration of "infinite loop", when 1100 * iterators are involved. 1101 * 1102 * Here's a situation in pseudo-BPF assembly form: 1103 * 1104 * 0: again: ; set up iter_next() call args 1105 * 1: r1 = &it ; <CHECKPOINT HERE> 1106 * 2: call bpf_iter_num_next ; this is iter_next() call 1107 * 3: if r0 == 0 goto done 1108 * 4: ... something useful here ... 1109 * 5: goto again ; another iteration 1110 * 6: done: 1111 * 7: r1 = &it 1112 * 8: call bpf_iter_num_destroy ; clean up iter state 1113 * 9: exit 1114 * 1115 * This is a typical loop. Let's assume that we have a prune point at 1:, 1116 * before we get to `call bpf_iter_num_next` (e.g., because of that `goto 1117 * again`, assuming other heuristics don't get in a way). 1118 * 1119 * When we first time come to 1:, let's say we have some state X. We proceed 1120 * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit. 1121 * Now we come back to validate that forked ACTIVE state. We proceed through 1122 * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we 1123 * are converging. But the problem is that we don't know that yet, as this 1124 * convergence has to happen at iter_next() call site only. So if nothing is 1125 * done, at 1: verifier will use bounded loop logic and declare infinite 1126 * looping (and would be *technically* correct, if not for iterator's 1127 * "eventual sticky NULL" contract, see process_iter_next_call()). But we 1128 * don't want that. So what we do in process_iter_next_call() when we go on 1129 * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's 1130 * a different iteration. So when we suspect an infinite loop, we additionally 1131 * check if any of the *ACTIVE* iterator states depths differ. If yes, we 1132 * pretend we are not looping and wait for next iter_next() call. 1133 * 1134 * This only applies to ACTIVE state. In DRAINED state we don't expect to 1135 * loop, because that would actually mean infinite loop, as DRAINED state is 1136 * "sticky", and so we'll keep returning into the same instruction with the 1137 * same state (at least in one of possible code paths). 1138 * 1139 * This approach allows to keep infinite loop heuristic even in the face of 1140 * active iterator. E.g., C snippet below is and will be detected as 1141 * infinitely looping: 1142 * 1143 * struct bpf_iter_num it; 1144 * int *p, x; 1145 * 1146 * bpf_iter_num_new(&it, 0, 10); 1147 * while ((p = bpf_iter_num_next(&t))) { 1148 * x = p; 1149 * while (x--) {} // <<-- infinite loop here 1150 * } 1151 * 1152 */ 1153 static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur) 1154 { 1155 struct bpf_reg_state *slot, *cur_slot; 1156 struct bpf_func_state *state; 1157 int i, fr; 1158 1159 for (fr = old->curframe; fr >= 0; fr--) { 1160 state = old->frame[fr]; 1161 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 1162 if (state->stack[i].slot_type[0] != STACK_ITER) 1163 continue; 1164 1165 slot = &state->stack[i].spilled_ptr; 1166 if (slot->iter.state != BPF_ITER_STATE_ACTIVE) 1167 continue; 1168 1169 cur_slot = &cur->frame[fr]->stack[i].spilled_ptr; 1170 if (cur_slot->iter.depth != slot->iter.depth) 1171 return true; 1172 } 1173 } 1174 return false; 1175 } 1176 1177 static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 1178 { 1179 struct bpf_func_state *func; 1180 struct bpf_reg_state *reg; 1181 int i, j; 1182 1183 for (i = 0; i <= st->curframe; i++) { 1184 func = st->frame[i]; 1185 for (j = 0; j < BPF_REG_FP; j++) { 1186 reg = &func->regs[j]; 1187 if (reg->type != SCALAR_VALUE) 1188 continue; 1189 reg->precise = false; 1190 } 1191 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { 1192 if (!bpf_is_spilled_reg(&func->stack[j])) 1193 continue; 1194 reg = &func->stack[j].spilled_ptr; 1195 if (reg->type != SCALAR_VALUE) 1196 continue; 1197 reg->precise = false; 1198 } 1199 } 1200 } 1201 1202 int bpf_is_state_visited(struct bpf_verifier_env *env, int insn_idx) 1203 { 1204 struct bpf_verifier_state_list *new_sl; 1205 struct bpf_verifier_state_list *sl; 1206 struct bpf_verifier_state *cur = env->cur_state, *new; 1207 bool force_new_state, add_new_state, loop; 1208 int n, err, states_cnt = 0; 1209 struct list_head *pos, *tmp, *head; 1210 1211 force_new_state = env->test_state_freq || bpf_is_force_checkpoint(env, insn_idx) || 1212 /* Avoid accumulating infinitely long jmp history */ 1213 cur->jmp_history_cnt > 40; 1214 1215 /* bpf progs typically have pruning point every 4 instructions 1216 * http://vger.kernel.org/bpfconf2019.html#session-1 1217 * Do not add new state for future pruning if the verifier hasn't seen 1218 * at least 2 jumps and at least 8 instructions. 1219 * This heuristics helps decrease 'total_states' and 'peak_states' metric. 1220 * In tests that amounts to up to 50% reduction into total verifier 1221 * memory consumption and 20% verifier time speedup. 1222 */ 1223 add_new_state = force_new_state; 1224 if (env->jmps_processed - env->prev_jmps_processed >= 2 && 1225 env->insn_processed - env->prev_insn_processed >= 8) 1226 add_new_state = true; 1227 1228 /* keep cleaning the current state as registers/stack become dead */ 1229 err = clean_verifier_state(env, cur); 1230 if (err) 1231 return err; 1232 1233 loop = false; 1234 head = bpf_explored_state(env, insn_idx); 1235 list_for_each_safe(pos, tmp, head) { 1236 sl = container_of(pos, struct bpf_verifier_state_list, node); 1237 states_cnt++; 1238 if (sl->state.insn_idx != insn_idx) 1239 continue; 1240 1241 if (sl->state.branches) { 1242 struct bpf_func_state *frame = sl->state.frame[sl->state.curframe]; 1243 1244 if (frame->in_async_callback_fn && 1245 frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) { 1246 /* Different async_entry_cnt means that the verifier is 1247 * processing another entry into async callback. 1248 * Seeing the same state is not an indication of infinite 1249 * loop or infinite recursion. 1250 * But finding the same state doesn't mean that it's safe 1251 * to stop processing the current state. The previous state 1252 * hasn't yet reached bpf_exit, since state.branches > 0. 1253 * Checking in_async_callback_fn alone is not enough either. 1254 * Since the verifier still needs to catch infinite loops 1255 * inside async callbacks. 1256 */ 1257 goto skip_inf_loop_check; 1258 } 1259 /* BPF open-coded iterators loop detection is special. 1260 * states_maybe_looping() logic is too simplistic in detecting 1261 * states that *might* be equivalent, because it doesn't know 1262 * about ID remapping, so don't even perform it. 1263 * See process_iter_next_call() and iter_active_depths_differ() 1264 * for overview of the logic. When current and one of parent 1265 * states are detected as equivalent, it's a good thing: we prove 1266 * convergence and can stop simulating further iterations. 1267 * It's safe to assume that iterator loop will finish, taking into 1268 * account iter_next() contract of eventually returning 1269 * sticky NULL result. 1270 * 1271 * Note, that states have to be compared exactly in this case because 1272 * read and precision marks might not be finalized inside the loop. 1273 * E.g. as in the program below: 1274 * 1275 * 1. r7 = -16 1276 * 2. r6 = bpf_get_prandom_u32() 1277 * 3. while (bpf_iter_num_next(&fp[-8])) { 1278 * 4. if (r6 != 42) { 1279 * 5. r7 = -32 1280 * 6. r6 = bpf_get_prandom_u32() 1281 * 7. continue 1282 * 8. } 1283 * 9. r0 = r10 1284 * 10. r0 += r7 1285 * 11. r8 = *(u64 *)(r0 + 0) 1286 * 12. r6 = bpf_get_prandom_u32() 1287 * 13. } 1288 * 1289 * Here verifier would first visit path 1-3, create a checkpoint at 3 1290 * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does 1291 * not have read or precision mark for r7 yet, thus inexact states 1292 * comparison would discard current state with r7=-32 1293 * => unsafe memory access at 11 would not be caught. 1294 */ 1295 if (is_iter_next_insn(env, insn_idx)) { 1296 if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) { 1297 struct bpf_func_state *cur_frame; 1298 struct bpf_reg_state *iter_state, *iter_reg; 1299 int spi; 1300 1301 cur_frame = cur->frame[cur->curframe]; 1302 /* btf_check_iter_kfuncs() enforces that 1303 * iter state pointer is always the first arg 1304 */ 1305 iter_reg = &cur_frame->regs[BPF_REG_1]; 1306 /* current state is valid due to states_equal(), 1307 * so we can assume valid iter and reg state, 1308 * no need for extra (re-)validations 1309 */ 1310 spi = bpf_get_spi(iter_reg->var_off.value); 1311 iter_state = &bpf_func(env, iter_reg)->stack[spi].spilled_ptr; 1312 if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) { 1313 loop = true; 1314 goto hit; 1315 } 1316 } 1317 goto skip_inf_loop_check; 1318 } 1319 if (is_may_goto_insn_at(env, insn_idx)) { 1320 if (sl->state.may_goto_depth != cur->may_goto_depth && 1321 states_equal(env, &sl->state, cur, RANGE_WITHIN)) { 1322 loop = true; 1323 goto hit; 1324 } 1325 } 1326 if (bpf_calls_callback(env, insn_idx)) { 1327 if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) { 1328 loop = true; 1329 goto hit; 1330 } 1331 goto skip_inf_loop_check; 1332 } 1333 /* attempt to detect infinite loop to avoid unnecessary doomed work */ 1334 if (states_maybe_looping(&sl->state, cur) && 1335 states_equal(env, &sl->state, cur, EXACT) && 1336 !iter_active_depths_differ(&sl->state, cur) && 1337 sl->state.may_goto_depth == cur->may_goto_depth && 1338 sl->state.callback_unroll_depth == cur->callback_unroll_depth) { 1339 verbose_linfo(env, insn_idx, "; "); 1340 verbose(env, "infinite loop detected at insn %d\n", insn_idx); 1341 verbose(env, "cur state:"); 1342 print_verifier_state(env, cur, cur->curframe, true); 1343 verbose(env, "old state:"); 1344 print_verifier_state(env, &sl->state, cur->curframe, true); 1345 return -EINVAL; 1346 } 1347 /* if the verifier is processing a loop, avoid adding new state 1348 * too often, since different loop iterations have distinct 1349 * states and may not help future pruning. 1350 * This threshold shouldn't be too low to make sure that 1351 * a loop with large bound will be rejected quickly. 1352 * The most abusive loop will be: 1353 * r1 += 1 1354 * if r1 < 1000000 goto pc-2 1355 * 1M insn_procssed limit / 100 == 10k peak states. 1356 * This threshold shouldn't be too high either, since states 1357 * at the end of the loop are likely to be useful in pruning. 1358 */ 1359 skip_inf_loop_check: 1360 if (!force_new_state && 1361 env->jmps_processed - env->prev_jmps_processed < 20 && 1362 env->insn_processed - env->prev_insn_processed < 100) 1363 add_new_state = false; 1364 goto miss; 1365 } 1366 /* See comments for mark_all_regs_read_and_precise() */ 1367 loop = incomplete_read_marks(env, &sl->state); 1368 if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) { 1369 hit: 1370 sl->hit_cnt++; 1371 1372 /* if previous state reached the exit with precision and 1373 * current state is equivalent to it (except precision marks) 1374 * the precision needs to be propagated back in 1375 * the current state. 1376 */ 1377 err = 0; 1378 if (bpf_is_jmp_point(env, env->insn_idx)) 1379 err = bpf_push_jmp_history(env, cur, 0, 0); 1380 err = err ? : propagate_precision(env, &sl->state, cur, NULL); 1381 if (err) 1382 return err; 1383 /* When processing iterator based loops above propagate_liveness and 1384 * propagate_precision calls are not sufficient to transfer all relevant 1385 * read and precision marks. E.g. consider the following case: 1386 * 1387 * .-> A --. Assume the states are visited in the order A, B, C. 1388 * | | | Assume that state B reaches a state equivalent to state A. 1389 * | v v At this point, state C is not processed yet, so state A 1390 * '-- B C has not received any read or precision marks from C. 1391 * Thus, marks propagated from A to B are incomplete. 1392 * 1393 * The verifier mitigates this by performing the following steps: 1394 * 1395 * - Prior to the main verification pass, strongly connected components 1396 * (SCCs) are computed over the program's control flow graph, 1397 * intraprocedurally. 1398 * 1399 * - During the main verification pass, `maybe_enter_scc()` checks 1400 * whether the current verifier state is entering an SCC. If so, an 1401 * instance of a `bpf_scc_visit` object is created, and the state 1402 * entering the SCC is recorded as the entry state. 1403 * 1404 * - This instance is associated not with the SCC itself, but with a 1405 * `bpf_scc_callchain`: a tuple consisting of the call sites leading to 1406 * the SCC and the SCC id. See `compute_scc_callchain()`. 1407 * 1408 * - When a verification path encounters a `states_equal(..., 1409 * RANGE_WITHIN)` condition, there exists a call chain describing the 1410 * current state and a corresponding `bpf_scc_visit` instance. A copy 1411 * of the current state is created and added to 1412 * `bpf_scc_visit->backedges`. 1413 * 1414 * - When a verification path terminates, `maybe_exit_scc()` is called 1415 * from `bpf_update_branch_counts()`. For states with `branches == 0`, it 1416 * checks whether the state is the entry state of any `bpf_scc_visit` 1417 * instance. If it is, this indicates that all paths originating from 1418 * this SCC visit have been explored. `propagate_backedges()` is then 1419 * called, which propagates read and precision marks through the 1420 * backedges until a fixed point is reached. 1421 * (In the earlier example, this would propagate marks from A to B, 1422 * from C to A, and then again from A to B.) 1423 * 1424 * A note on callchains 1425 * -------------------- 1426 * 1427 * Consider the following example: 1428 * 1429 * void foo() { loop { ... SCC#1 ... } } 1430 * void main() { 1431 * A: foo(); 1432 * B: ... 1433 * C: foo(); 1434 * } 1435 * 1436 * Here, there are two distinct callchains leading to SCC#1: 1437 * - (A, SCC#1) 1438 * - (C, SCC#1) 1439 * 1440 * Each callchain identifies a separate `bpf_scc_visit` instance that 1441 * accumulates backedge states. The `propagate_{liveness,precision}()` 1442 * functions traverse the parent state of each backedge state, which 1443 * means these parent states must remain valid (i.e., not freed) while 1444 * the corresponding `bpf_scc_visit` instance exists. 1445 * 1446 * Associating `bpf_scc_visit` instances directly with SCCs instead of 1447 * callchains would break this invariant: 1448 * - States explored during `C: foo()` would contribute backedges to 1449 * SCC#1, but SCC#1 would only be exited once the exploration of 1450 * `A: foo()` completes. 1451 * - By that time, the states explored between `A: foo()` and `C: foo()` 1452 * (i.e., `B: ...`) may have already been freed, causing the parent 1453 * links for states from `C: foo()` to become invalid. 1454 */ 1455 if (loop) { 1456 struct bpf_scc_backedge *backedge; 1457 1458 backedge = kzalloc_obj(*backedge, 1459 GFP_KERNEL_ACCOUNT); 1460 if (!backedge) 1461 return -ENOMEM; 1462 err = bpf_copy_verifier_state(&backedge->state, cur); 1463 backedge->state.equal_state = &sl->state; 1464 backedge->state.insn_idx = insn_idx; 1465 err = err ?: add_scc_backedge(env, &sl->state, backedge); 1466 if (err) { 1467 bpf_free_verifier_state(&backedge->state, false); 1468 kfree(backedge); 1469 return err; 1470 } 1471 } 1472 return 1; 1473 } 1474 miss: 1475 /* when new state is not going to be added do not increase miss count. 1476 * Otherwise several loop iterations will remove the state 1477 * recorded earlier. The goal of these heuristics is to have 1478 * states from some iterations of the loop (some in the beginning 1479 * and some at the end) to help pruning. 1480 */ 1481 if (add_new_state) 1482 sl->miss_cnt++; 1483 /* heuristic to determine whether this state is beneficial 1484 * to keep checking from state equivalence point of view. 1485 * Higher numbers increase max_states_per_insn and verification time, 1486 * but do not meaningfully decrease insn_processed. 1487 * 'n' controls how many times state could miss before eviction. 1488 * Use bigger 'n' for checkpoints because evicting checkpoint states 1489 * too early would hinder iterator convergence. 1490 */ 1491 n = bpf_is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3; 1492 if (sl->miss_cnt > sl->hit_cnt * n + n) { 1493 /* the state is unlikely to be useful. Remove it to 1494 * speed up verification 1495 */ 1496 sl->in_free_list = true; 1497 list_del(&sl->node); 1498 list_add(&sl->node, &env->free_list); 1499 env->free_list_size++; 1500 env->explored_states_size--; 1501 maybe_free_verifier_state(env, sl); 1502 } 1503 } 1504 1505 if (env->max_states_per_insn < states_cnt) 1506 env->max_states_per_insn = states_cnt; 1507 1508 if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) 1509 return 0; 1510 1511 if (!add_new_state) 1512 return 0; 1513 1514 /* There were no equivalent states, remember the current one. 1515 * Technically the current state is not proven to be safe yet, 1516 * but it will either reach outer most bpf_exit (which means it's safe) 1517 * or it will be rejected. When there are no loops the verifier won't be 1518 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) 1519 * again on the way to bpf_exit. 1520 * When looping the sl->state.branches will be > 0 and this state 1521 * will not be considered for equivalence until branches == 0. 1522 */ 1523 new_sl = kzalloc_obj(struct bpf_verifier_state_list, GFP_KERNEL_ACCOUNT); 1524 if (!new_sl) 1525 return -ENOMEM; 1526 env->total_states++; 1527 env->explored_states_size++; 1528 update_peak_states(env); 1529 env->prev_jmps_processed = env->jmps_processed; 1530 env->prev_insn_processed = env->insn_processed; 1531 1532 /* forget precise markings we inherited, see __mark_chain_precision */ 1533 if (env->bpf_capable) 1534 mark_all_scalars_imprecise(env, cur); 1535 1536 bpf_clear_singular_ids(env, cur); 1537 1538 /* add new state to the head of linked list */ 1539 new = &new_sl->state; 1540 err = bpf_copy_verifier_state(new, cur); 1541 if (err) { 1542 bpf_free_verifier_state(new, false); 1543 kfree(new_sl); 1544 return err; 1545 } 1546 new->insn_idx = insn_idx; 1547 verifier_bug_if(new->branches != 1, env, 1548 "%s:branches_to_explore=%d insn %d", 1549 __func__, new->branches, insn_idx); 1550 err = maybe_enter_scc(env, new); 1551 if (err) { 1552 bpf_free_verifier_state(new, false); 1553 kfree(new_sl); 1554 return err; 1555 } 1556 1557 cur->parent = new; 1558 cur->first_insn_idx = insn_idx; 1559 cur->dfs_depth = new->dfs_depth + 1; 1560 bpf_clear_jmp_history(cur); 1561 list_add(&new_sl->node, head); 1562 return 0; 1563 } 1564