1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ 3 #include <linux/bpf.h> 4 #include <linux/bpf_verifier.h> 5 #include <linux/cnum.h> 6 #include <linux/filter.h> 7 8 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args) 9 10 #define BPF_COMPLEXITY_LIMIT_STATES 64 11 12 static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx) 13 { 14 return bpf_is_may_goto_insn(&env->prog->insnsi[insn_idx]); 15 } 16 17 static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx) 18 { 19 return env->insn_aux_data[insn_idx].is_iter_next; 20 } 21 22 static void update_peak_states(struct bpf_verifier_env *env) 23 { 24 u32 cur_states; 25 26 cur_states = env->explored_states_size + env->free_list_size + env->num_backedges; 27 env->peak_states = max(env->peak_states, cur_states); 28 } 29 30 /* struct bpf_verifier_state->parent refers to states 31 * that are in either of env->{expored_states,free_list}. 32 * In both cases the state is contained in struct bpf_verifier_state_list. 33 */ 34 static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st) 35 { 36 if (st->parent) 37 return container_of(st->parent, struct bpf_verifier_state_list, state); 38 return NULL; 39 } 40 41 static bool incomplete_read_marks(struct bpf_verifier_env *env, 42 struct bpf_verifier_state *st); 43 44 /* A state can be freed if it is no longer referenced: 45 * - is in the env->free_list; 46 * - has no children states; 47 */ 48 static void maybe_free_verifier_state(struct bpf_verifier_env *env, 49 struct bpf_verifier_state_list *sl) 50 { 51 if (!sl->in_free_list 52 || sl->state.branches != 0 53 || incomplete_read_marks(env, &sl->state)) 54 return; 55 list_del(&sl->node); 56 bpf_free_verifier_state(&sl->state, false); 57 kfree(sl); 58 env->free_list_size--; 59 } 60 61 /* For state @st look for a topmost frame with frame_insn_idx() in some SCC, 62 * if such frame exists form a corresponding @callchain as an array of 63 * call sites leading to this frame and SCC id. 64 * E.g.: 65 * 66 * void foo() { A: loop {... SCC#1 ...}; } 67 * void bar() { B: loop { C: foo(); ... SCC#2 ... } 68 * D: loop { E: foo(); ... SCC#3 ... } } 69 * void main() { F: bar(); } 70 * 71 * @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending 72 * on @st frame call sites being (F,C,A) or (F,E,A). 73 */ 74 static bool compute_scc_callchain(struct bpf_verifier_env *env, 75 struct bpf_verifier_state *st, 76 struct bpf_scc_callchain *callchain) 77 { 78 u32 i, scc, insn_idx; 79 80 memset(callchain, 0, sizeof(*callchain)); 81 for (i = 0; i <= st->curframe; i++) { 82 insn_idx = bpf_frame_insn_idx(st, i); 83 scc = env->insn_aux_data[insn_idx].scc; 84 if (scc) { 85 callchain->scc = scc; 86 break; 87 } else if (i < st->curframe) { 88 callchain->callsites[i] = insn_idx; 89 } else { 90 return false; 91 } 92 } 93 return true; 94 } 95 96 /* Check if bpf_scc_visit instance for @callchain exists. */ 97 static struct bpf_scc_visit *scc_visit_lookup(struct bpf_verifier_env *env, 98 struct bpf_scc_callchain *callchain) 99 { 100 struct bpf_scc_info *info = env->scc_info[callchain->scc]; 101 struct bpf_scc_visit *visits = info->visits; 102 u32 i; 103 104 if (!info) 105 return NULL; 106 for (i = 0; i < info->num_visits; i++) 107 if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0) 108 return &visits[i]; 109 return NULL; 110 } 111 112 /* Allocate a new bpf_scc_visit instance corresponding to @callchain. 113 * Allocated instances are alive for a duration of the do_check_common() 114 * call and are freed by free_states(). 115 */ 116 static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env, 117 struct bpf_scc_callchain *callchain) 118 { 119 struct bpf_scc_visit *visit; 120 struct bpf_scc_info *info; 121 u32 scc, num_visits; 122 u64 new_sz; 123 124 scc = callchain->scc; 125 info = env->scc_info[scc]; 126 num_visits = info ? info->num_visits : 0; 127 new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1); 128 info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT); 129 if (!info) 130 return NULL; 131 env->scc_info[scc] = info; 132 info->num_visits = num_visits + 1; 133 visit = &info->visits[num_visits]; 134 memset(visit, 0, sizeof(*visit)); 135 memcpy(&visit->callchain, callchain, sizeof(*callchain)); 136 return visit; 137 } 138 139 /* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */ 140 static char *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain) 141 { 142 char *buf = env->tmp_str_buf; 143 int i, delta = 0; 144 145 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "("); 146 for (i = 0; i < ARRAY_SIZE(callchain->callsites); i++) { 147 if (!callchain->callsites[i]) 148 break; 149 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u,", 150 callchain->callsites[i]); 151 } 152 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u)", callchain->scc); 153 return env->tmp_str_buf; 154 } 155 156 /* If callchain for @st exists (@st is in some SCC), ensure that 157 * bpf_scc_visit instance for this callchain exists. 158 * If instance does not exist or is empty, assign visit->entry_state to @st. 159 */ 160 static int maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 161 { 162 struct bpf_scc_callchain *callchain = &env->callchain_buf; 163 struct bpf_scc_visit *visit; 164 165 if (!compute_scc_callchain(env, st, callchain)) 166 return 0; 167 visit = scc_visit_lookup(env, callchain); 168 visit = visit ?: scc_visit_alloc(env, callchain); 169 if (!visit) 170 return -ENOMEM; 171 if (!visit->entry_state) { 172 visit->entry_state = st; 173 if (env->log.level & BPF_LOG_LEVEL2) 174 verbose(env, "SCC enter %s\n", format_callchain(env, callchain)); 175 } 176 return 0; 177 } 178 179 static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit); 180 181 /* If callchain for @st exists (@st is in some SCC), make it empty: 182 * - set visit->entry_state to NULL; 183 * - flush accumulated backedges. 184 */ 185 static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 186 { 187 struct bpf_scc_callchain *callchain = &env->callchain_buf; 188 struct bpf_scc_visit *visit; 189 190 if (!compute_scc_callchain(env, st, callchain)) 191 return 0; 192 visit = scc_visit_lookup(env, callchain); 193 if (!visit) { 194 /* 195 * If path traversal stops inside an SCC, corresponding bpf_scc_visit 196 * must exist for non-speculative paths. For non-speculative paths 197 * traversal stops when: 198 * a. Verification error is found, maybe_exit_scc() is not called. 199 * b. Top level BPF_EXIT is reached. Top level BPF_EXIT is not a member 200 * of any SCC. 201 * c. A checkpoint is reached and matched. Checkpoints are created by 202 * is_state_visited(), which calls maybe_enter_scc(), which allocates 203 * bpf_scc_visit instances for checkpoints within SCCs. 204 * (c) is the only case that can reach this point. 205 */ 206 if (!st->speculative) { 207 verifier_bug(env, "scc exit: no visit info for call chain %s", 208 format_callchain(env, callchain)); 209 return -EFAULT; 210 } 211 return 0; 212 } 213 if (visit->entry_state != st) 214 return 0; 215 if (env->log.level & BPF_LOG_LEVEL2) 216 verbose(env, "SCC exit %s\n", format_callchain(env, callchain)); 217 visit->entry_state = NULL; 218 env->num_backedges -= visit->num_backedges; 219 visit->num_backedges = 0; 220 update_peak_states(env); 221 return propagate_backedges(env, visit); 222 } 223 224 /* Lookup an bpf_scc_visit instance corresponding to @st callchain 225 * and add @backedge to visit->backedges. @st callchain must exist. 226 */ 227 static int add_scc_backedge(struct bpf_verifier_env *env, 228 struct bpf_verifier_state *st, 229 struct bpf_scc_backedge *backedge) 230 { 231 struct bpf_scc_callchain *callchain = &env->callchain_buf; 232 struct bpf_scc_visit *visit; 233 234 if (!compute_scc_callchain(env, st, callchain)) { 235 verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d", 236 st->insn_idx); 237 return -EFAULT; 238 } 239 visit = scc_visit_lookup(env, callchain); 240 if (!visit) { 241 verifier_bug(env, "add backedge: no visit info for call chain %s", 242 format_callchain(env, callchain)); 243 return -EFAULT; 244 } 245 if (env->log.level & BPF_LOG_LEVEL2) 246 verbose(env, "SCC backedge %s\n", format_callchain(env, callchain)); 247 backedge->next = visit->backedges; 248 visit->backedges = backedge; 249 visit->num_backedges++; 250 env->num_backedges++; 251 update_peak_states(env); 252 return 0; 253 } 254 255 /* bpf_reg_state->live marks for registers in a state @st are incomplete, 256 * if state @st is in some SCC and not all execution paths starting at this 257 * SCC are fully explored. 258 */ 259 static bool incomplete_read_marks(struct bpf_verifier_env *env, 260 struct bpf_verifier_state *st) 261 { 262 struct bpf_scc_callchain *callchain = &env->callchain_buf; 263 struct bpf_scc_visit *visit; 264 265 if (!compute_scc_callchain(env, st, callchain)) 266 return false; 267 visit = scc_visit_lookup(env, callchain); 268 if (!visit) 269 return false; 270 return !!visit->backedges; 271 } 272 273 int bpf_update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 274 { 275 struct bpf_verifier_state_list *sl = NULL, *parent_sl; 276 struct bpf_verifier_state *parent; 277 int err; 278 279 while (st) { 280 u32 br = --st->branches; 281 282 /* verifier_bug_if(br > 1, ...) technically makes sense here, 283 * but see comment in push_stack(), hence: 284 */ 285 verifier_bug_if((int)br < 0, env, "%s:branches_to_explore=%d", __func__, br); 286 if (br) 287 break; 288 err = maybe_exit_scc(env, st); 289 if (err) 290 return err; 291 parent = st->parent; 292 parent_sl = state_parent_as_list(st); 293 if (sl) 294 maybe_free_verifier_state(env, sl); 295 st = parent; 296 sl = parent_sl; 297 } 298 return 0; 299 } 300 301 /* check %cur's range satisfies %old's */ 302 static bool range_within(const struct bpf_reg_state *old, 303 const struct bpf_reg_state *cur) 304 { 305 return cnum64_is_subset(old->r64, cur->r64) && 306 cnum32_is_subset(old->r32, cur->r32); 307 } 308 309 /* If in the old state two registers had the same id, then they need to have 310 * the same id in the new state as well. But that id could be different from 311 * the old state, so we need to track the mapping from old to new ids. 312 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent 313 * regs with old id 5 must also have new id 9 for the new state to be safe. But 314 * regs with a different old id could still have new id 9, we don't care about 315 * that. 316 * So we look through our idmap to see if this old id has been seen before. If 317 * so, we require the new id to match; otherwise, we add the id pair to the map. 318 */ 319 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) 320 { 321 struct bpf_id_pair *map = idmap->map; 322 unsigned int i; 323 324 /* either both IDs should be set or both should be zero */ 325 if (!!old_id != !!cur_id) 326 return false; 327 328 if (old_id == 0) /* cur_id == 0 as well */ 329 return true; 330 331 for (i = 0; i < idmap->cnt; i++) { 332 if (map[i].old == old_id) 333 return map[i].cur == cur_id; 334 if (map[i].cur == cur_id) 335 return false; 336 } 337 338 /* Reached the end of known mappings; haven't seen this id before */ 339 if (idmap->cnt < BPF_ID_MAP_SIZE) { 340 map[idmap->cnt].old = old_id; 341 map[idmap->cnt].cur = cur_id; 342 idmap->cnt++; 343 return true; 344 } 345 346 /* We ran out of idmap slots, which should be impossible */ 347 WARN_ON_ONCE(1); 348 return false; 349 } 350 351 /* 352 * Compare scalar register IDs for state equivalence. 353 * 354 * When old_id == 0, the old register is independent - not linked to any 355 * other register. Any linking in the current state only adds constraints, 356 * making it more restrictive. Since the old state didn't rely on any ID 357 * relationships for this register, it's always safe to accept cur regardless 358 * of its ID. Hence, return true immediately. 359 * 360 * When old_id != 0 but cur_id == 0, we need to ensure that different 361 * independent registers in cur don't incorrectly satisfy the ID matching 362 * requirements of linked registers in old. 363 * 364 * Example: if old has r6.id=X and r7.id=X (linked), but cur has r6.id=0 365 * and r7.id=0 (both independent), without temp IDs both would map old_id=X 366 * to cur_id=0 and pass. With temp IDs: r6 maps X->temp1, r7 tries to map 367 * X->temp2, but X is already mapped to temp1, so the check fails correctly. 368 * 369 * When old_id has BPF_ADD_CONST set, the compound id (base | flag) and the 370 * base id (flag stripped) must both map consistently. Example: old has 371 * r2.id=A, r3.id=A|flag (r3 = r2 + delta), cur has r2.id=B, r3.id=C|flag 372 * (r3 derived from unrelated r4). Without the base check, idmap gets two 373 * independent entries A->B and A|flag->C|flag, missing that A->C conflicts 374 * with A->B. The base ID cross-check catches this. 375 */ 376 static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) 377 { 378 if (!old_id) 379 return true; 380 381 cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen; 382 383 if (!check_ids(old_id, cur_id, idmap)) 384 return false; 385 if (old_id & BPF_ADD_CONST) { 386 old_id &= ~BPF_ADD_CONST; 387 cur_id &= ~BPF_ADD_CONST; 388 if (!check_ids(old_id, cur_id, idmap)) 389 return false; 390 } 391 return true; 392 } 393 394 static void __clean_func_state(struct bpf_verifier_env *env, 395 struct bpf_func_state *st, 396 u16 live_regs, int frame) 397 { 398 int i, j; 399 400 for (i = 0; i < BPF_REG_FP; i++) { 401 /* liveness must not touch this register anymore */ 402 if (!(live_regs & BIT(i))) 403 /* since the register is unused, clear its state 404 * to make further comparison simpler 405 */ 406 bpf_mark_reg_not_init(env, &st->regs[i]); 407 } 408 409 /* 410 * Clean dead 4-byte halves within each SPI independently. 411 * half_spi 2*i → lower half: slot_type[0..3] (closer to FP) 412 * half_spi 2*i+1 → upper half: slot_type[4..7] (farther from FP) 413 */ 414 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) { 415 bool lo_live = bpf_stack_slot_alive(env, frame, i * 2); 416 bool hi_live = bpf_stack_slot_alive(env, frame, i * 2 + 1); 417 418 if (!hi_live || !lo_live) { 419 int start = !lo_live ? 0 : BPF_REG_SIZE / 2; 420 int end = !hi_live ? BPF_REG_SIZE : BPF_REG_SIZE / 2; 421 u8 stype = st->stack[i].slot_type[7]; 422 423 /* 424 * Don't clear special slots. 425 * destroy_if_dynptr_stack_slot() needs STACK_DYNPTR to 426 * detect overwrites and invalidate associated data slices. 427 * is_iter_reg_valid_uninit() and is_irq_flag_reg_valid_uninit() 428 * check for their respective slot types to detect double-create. 429 */ 430 if (stype == STACK_DYNPTR || stype == STACK_ITER || 431 stype == STACK_IRQ_FLAG) 432 continue; 433 434 /* 435 * Only destroy spilled_ptr when hi half is dead. 436 * If hi half is still live with STACK_SPILL, the 437 * spilled_ptr metadata is needed for correct state 438 * comparison in stacksafe(). 439 * is_spilled_reg() is using slot_type[7], but 440 * is_spilled_scalar_after() check either slot_type[0] or [4] 441 */ 442 if (!hi_live) { 443 struct bpf_reg_state *spill = &st->stack[i].spilled_ptr; 444 445 if (lo_live && stype == STACK_SPILL) { 446 u8 val = STACK_MISC; 447 448 /* 449 * 8 byte spill of scalar 0 where half slot is dead 450 * should become STACK_ZERO in lo 4 bytes. 451 */ 452 if (bpf_register_is_null(spill)) 453 val = STACK_ZERO; 454 for (j = 0; j < 4; j++) { 455 u8 *t = &st->stack[i].slot_type[j]; 456 457 if (*t == STACK_SPILL) 458 *t = val; 459 } 460 } 461 bpf_mark_reg_not_init(env, spill); 462 } 463 for (j = start; j < end; j++) 464 st->stack[i].slot_type[j] = STACK_POISON; 465 } 466 } 467 } 468 469 static int clean_verifier_state(struct bpf_verifier_env *env, 470 struct bpf_verifier_state *st) 471 { 472 int i, err; 473 474 err = bpf_live_stack_query_init(env, st); 475 if (err) 476 return err; 477 for (i = 0; i <= st->curframe; i++) { 478 u32 ip = bpf_frame_insn_idx(st, i); 479 u16 live_regs = env->insn_aux_data[ip].live_regs_before; 480 481 __clean_func_state(env, st->frame[i], live_regs, i); 482 } 483 return 0; 484 } 485 486 static bool regs_exact(const struct bpf_reg_state *rold, 487 const struct bpf_reg_state *rcur, 488 struct bpf_idmap *idmap) 489 { 490 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 491 check_ids(rold->id, rcur->id, idmap) && 492 check_ids(rold->parent_id, rcur->parent_id, idmap); 493 } 494 495 enum exact_level { 496 NOT_EXACT, 497 EXACT, 498 RANGE_WITHIN 499 }; 500 501 /* Returns true if (rold safe implies rcur safe) */ 502 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, 503 struct bpf_reg_state *rcur, struct bpf_idmap *idmap, 504 enum exact_level exact) 505 { 506 if (exact == EXACT) 507 return regs_exact(rold, rcur, idmap); 508 509 if (rold->type == NOT_INIT) 510 /* explored state can't have used this */ 511 return true; 512 513 /* Enforce that register types have to match exactly, including their 514 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general 515 * rule. 516 * 517 * One can make a point that using a pointer register as unbounded 518 * SCALAR would be technically acceptable, but this could lead to 519 * pointer leaks because scalars are allowed to leak while pointers 520 * are not. We could make this safe in special cases if root is 521 * calling us, but it's probably not worth the hassle. 522 * 523 * Also, register types that are *not* MAYBE_NULL could technically be 524 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE 525 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point 526 * to the same map). 527 * However, if the old MAYBE_NULL register then got NULL checked, 528 * doing so could have affected others with the same id, and we can't 529 * check for that because we lost the id when we converted to 530 * a non-MAYBE_NULL variant. 531 * So, as a general rule we don't allow mixing MAYBE_NULL and 532 * non-MAYBE_NULL registers as well. 533 */ 534 if (rold->type != rcur->type) 535 return false; 536 537 switch (base_type(rold->type)) { 538 case SCALAR_VALUE: 539 if (env->explore_alu_limits) { 540 /* explore_alu_limits disables tnum_in() and range_within() 541 * logic and requires everything to be strict 542 */ 543 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 544 check_scalar_ids(rold->id, rcur->id, idmap); 545 } 546 if (!rold->precise && exact == NOT_EXACT) 547 return true; 548 /* 549 * Linked register tracking uses rold->id to detect relationships. 550 * When rold->id == 0, the register is independent and any linking 551 * in rcur only adds constraints. When rold->id != 0, we must verify 552 * id mapping and (for BPF_ADD_CONST) offset consistency. 553 * 554 * +------------------+-----------+------------------+---------------+ 555 * | | rold->id | rold + ADD_CONST | rold->id == 0 | 556 * |------------------+-----------+------------------+---------------| 557 * | rcur->id | range,ids | false | range | 558 * | rcur + ADD_CONST | false | range,ids,off | range | 559 * | rcur->id == 0 | range,ids | false | range | 560 * +------------------+-----------+------------------+---------------+ 561 * 562 * Why check_ids() for scalar registers? 563 * 564 * Consider the following BPF code: 565 * 1: r6 = ... unbound scalar, ID=a ... 566 * 2: r7 = ... unbound scalar, ID=b ... 567 * 3: if (r6 > r7) goto +1 568 * 4: r6 = r7 569 * 5: if (r6 > X) goto ... 570 * 6: ... memory operation using r7 ... 571 * 572 * First verification path is [1-6]: 573 * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7; 574 * - at (5) r6 would be marked <= X, sync_linked_regs() would also mark 575 * r7 <= X, because r6 and r7 share same id. 576 * Next verification path is [1-4, 6]. 577 * 578 * Instruction (6) would be reached in two states: 579 * I. r6{.id=b}, r7{.id=b} via path 1-6; 580 * II. r6{.id=a}, r7{.id=b} via path 1-4, 6. 581 * 582 * Use check_ids() to distinguish these states. 583 * --- 584 * Also verify that new value satisfies old value range knowledge. 585 */ 586 587 /* 588 * ADD_CONST flags must match exactly: BPF_ADD_CONST32 and 589 * BPF_ADD_CONST64 have different linking semantics in 590 * sync_linked_regs() (alu32 zero-extends, alu64 does not), 591 * so pruning across different flag types is unsafe. 592 */ 593 if (rold->id && 594 (rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST)) 595 return false; 596 597 /* Both have offset linkage: offsets must match */ 598 if ((rold->id & BPF_ADD_CONST) && rold->delta != rcur->delta) 599 return false; 600 601 if (!check_scalar_ids(rold->id, rcur->id, idmap)) 602 return false; 603 604 return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off); 605 case PTR_TO_MAP_KEY: 606 case PTR_TO_MAP_VALUE: 607 case PTR_TO_MEM: 608 case PTR_TO_BUF: 609 case PTR_TO_TP_BUFFER: 610 /* If the new min/max/var_off satisfy the old ones and 611 * everything else matches, we are OK. 612 */ 613 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 && 614 range_within(rold, rcur) && 615 tnum_in(rold->var_off, rcur->var_off) && 616 check_ids(rold->id, rcur->id, idmap) && 617 check_ids(rold->parent_id, rcur->parent_id, idmap); 618 case PTR_TO_PACKET_META: 619 case PTR_TO_PACKET: 620 /* We must have at least as much range as the old ptr 621 * did, so that any accesses which were safe before are 622 * still safe. This is true even if old range < old off, 623 * since someone could have accessed through (ptr - k), or 624 * even done ptr -= k in a register, to get a safe access. 625 */ 626 if (rold->range < 0 || rcur->range < 0) { 627 /* special case for [BEYOND|AT]_PKT_END */ 628 if (rold->range != rcur->range) 629 return false; 630 } else if (rold->range > rcur->range) { 631 return false; 632 } 633 /* id relations must be preserved */ 634 if (!check_ids(rold->id, rcur->id, idmap)) 635 return false; 636 /* new val must satisfy old val knowledge */ 637 return range_within(rold, rcur) && 638 tnum_in(rold->var_off, rcur->var_off); 639 case PTR_TO_STACK: 640 /* two stack pointers are equal only if they're pointing to 641 * the same stack frame, since fp-8 in foo != fp-8 in bar 642 */ 643 return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno; 644 case PTR_TO_ARENA: 645 return true; 646 case PTR_TO_INSN: 647 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 && 648 range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off); 649 default: 650 return regs_exact(rold, rcur, idmap); 651 } 652 } 653 654 static struct bpf_reg_state unbound_reg; 655 656 static __init int unbound_reg_init(void) 657 { 658 bpf_mark_reg_unknown_imprecise(&unbound_reg); 659 return 0; 660 } 661 late_initcall(unbound_reg_init); 662 663 static bool is_spilled_scalar_after(const struct bpf_stack_state *stack, int im) 664 { 665 return stack->slot_type[im] == STACK_SPILL && 666 stack->spilled_ptr.type == SCALAR_VALUE; 667 } 668 669 static bool is_stack_misc_after(struct bpf_verifier_env *env, 670 struct bpf_stack_state *stack, int im) 671 { 672 u32 i; 673 674 for (i = im; i < ARRAY_SIZE(stack->slot_type); ++i) { 675 if ((stack->slot_type[i] == STACK_MISC) || 676 ((stack->slot_type[i] == STACK_INVALID || stack->slot_type[i] == STACK_POISON) && 677 env->allow_uninit_stack)) 678 continue; 679 return false; 680 } 681 682 return true; 683 } 684 685 static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env, 686 struct bpf_stack_state *stack, int im) 687 { 688 if (is_spilled_scalar_after(stack, im)) 689 return &stack->spilled_ptr; 690 691 if (is_stack_misc_after(env, stack, im)) 692 return &unbound_reg; 693 694 return NULL; 695 } 696 697 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, 698 struct bpf_func_state *cur, struct bpf_idmap *idmap, 699 enum exact_level exact) 700 { 701 int i, spi; 702 703 /* walk slots of the explored stack and ignore any additional 704 * slots in the current stack, since explored(safe) state 705 * didn't use them 706 */ 707 for (i = 0; i < old->allocated_stack; i++) { 708 struct bpf_reg_state *old_reg, *cur_reg; 709 int im = i % BPF_REG_SIZE; 710 711 spi = i / BPF_REG_SIZE; 712 713 if (exact == EXACT) { 714 u8 old_type = old->stack[spi].slot_type[i % BPF_REG_SIZE]; 715 u8 cur_type = i < cur->allocated_stack ? 716 cur->stack[spi].slot_type[i % BPF_REG_SIZE] : STACK_INVALID; 717 718 /* STACK_INVALID and STACK_POISON are equivalent for pruning */ 719 if (old_type == STACK_POISON) 720 old_type = STACK_INVALID; 721 if (cur_type == STACK_POISON) 722 cur_type = STACK_INVALID; 723 if (i >= cur->allocated_stack || old_type != cur_type) 724 return false; 725 } 726 727 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID || 728 old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_POISON) 729 continue; 730 731 if (env->allow_uninit_stack && 732 old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC) 733 continue; 734 735 /* explored stack has more populated slots than current stack 736 * and these slots were used 737 */ 738 if (i >= cur->allocated_stack) 739 return false; 740 741 /* 742 * 64 and 32-bit scalar spills vs MISC/INVALID slots and vice versa. 743 * Load from MISC/INVALID slots produces unbound scalar. 744 * Construct a fake register for such stack and call 745 * regsafe() to ensure scalar ids are compared. 746 */ 747 if (im == 0 || im == 4) { 748 old_reg = scalar_reg_for_stack(env, &old->stack[spi], im); 749 cur_reg = scalar_reg_for_stack(env, &cur->stack[spi], im); 750 if (old_reg && cur_reg) { 751 if (!regsafe(env, old_reg, cur_reg, idmap, exact)) 752 return false; 753 i += (im == 0 ? BPF_REG_SIZE - 1 : 3); 754 continue; 755 } 756 } 757 758 /* if old state was safe with misc data in the stack 759 * it will be safe with zero-initialized stack. 760 * The opposite is not true 761 */ 762 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC && 763 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO) 764 continue; 765 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != 766 cur->stack[spi].slot_type[i % BPF_REG_SIZE]) 767 /* Ex: old explored (safe) state has STACK_SPILL in 768 * this stack slot, but current has STACK_MISC -> 769 * this verifier states are not equivalent, 770 * return false to continue verification of this path 771 */ 772 return false; 773 if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1) 774 continue; 775 /* Both old and cur are having same slot_type */ 776 switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) { 777 case STACK_SPILL: 778 /* when explored and current stack slot are both storing 779 * spilled registers, check that stored pointers types 780 * are the same as well. 781 * Ex: explored safe path could have stored 782 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8} 783 * but current path has stored: 784 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16} 785 * such verifier states are not equivalent. 786 * return false to continue verification of this path 787 */ 788 if (!regsafe(env, &old->stack[spi].spilled_ptr, 789 &cur->stack[spi].spilled_ptr, idmap, exact)) 790 return false; 791 break; 792 case STACK_DYNPTR: 793 old_reg = &old->stack[spi].spilled_ptr; 794 cur_reg = &cur->stack[spi].spilled_ptr; 795 if (old_reg->dynptr.type != cur_reg->dynptr.type || 796 old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot || 797 !check_ids(old_reg->id, cur_reg->id, idmap) || 798 !check_ids(old_reg->parent_id, cur_reg->parent_id, idmap)) 799 return false; 800 break; 801 case STACK_ITER: 802 old_reg = &old->stack[spi].spilled_ptr; 803 cur_reg = &cur->stack[spi].spilled_ptr; 804 /* iter.depth is not compared between states as it 805 * doesn't matter for correctness and would otherwise 806 * prevent convergence; we maintain it only to prevent 807 * infinite loop check triggering, see 808 * iter_active_depths_differ() 809 */ 810 if (old_reg->iter.btf != cur_reg->iter.btf || 811 old_reg->iter.btf_id != cur_reg->iter.btf_id || 812 old_reg->iter.state != cur_reg->iter.state || 813 /* ignore {old_reg,cur_reg}->iter.depth, see above */ 814 !check_ids(old_reg->id, cur_reg->id, idmap)) 815 return false; 816 break; 817 case STACK_IRQ_FLAG: 818 old_reg = &old->stack[spi].spilled_ptr; 819 cur_reg = &cur->stack[spi].spilled_ptr; 820 if (!check_ids(old_reg->id, cur_reg->id, idmap) || 821 old_reg->irq.kfunc_class != cur_reg->irq.kfunc_class) 822 return false; 823 break; 824 case STACK_MISC: 825 case STACK_ZERO: 826 case STACK_INVALID: 827 case STACK_POISON: 828 continue; 829 /* Ensure that new unhandled slot types return false by default */ 830 default: 831 return false; 832 } 833 } 834 return true; 835 } 836 837 /* 838 * Compare stack arg slots between old and current states. 839 * Outgoing stack args are path-local state and must agree for pruning. 840 */ 841 static bool stack_arg_safe(struct bpf_verifier_env *env, struct bpf_func_state *old, 842 struct bpf_func_state *cur, struct bpf_idmap *idmap, 843 enum exact_level exact) 844 { 845 int i, nslots; 846 847 nslots = max(old->out_stack_arg_cnt, cur->out_stack_arg_cnt); 848 for (i = 0; i < nslots; i++) { 849 struct bpf_reg_state *old_arg, *cur_arg; 850 struct bpf_reg_state not_init = { .type = NOT_INIT }; 851 852 old_arg = i < old->out_stack_arg_cnt ? 853 &old->stack_arg_regs[i] : ¬_init; 854 cur_arg = i < cur->out_stack_arg_cnt ? 855 &cur->stack_arg_regs[i] : ¬_init; 856 if (!regsafe(env, old_arg, cur_arg, idmap, exact)) 857 return false; 858 } 859 860 return true; 861 } 862 863 static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur, 864 struct bpf_idmap *idmap) 865 { 866 int i; 867 868 if (old->acquired_refs != cur->acquired_refs) 869 return false; 870 871 if (old->active_locks != cur->active_locks) 872 return false; 873 874 if (old->active_preempt_locks != cur->active_preempt_locks) 875 return false; 876 877 if (old->active_rcu_locks != cur->active_rcu_locks) 878 return false; 879 880 if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap)) 881 return false; 882 883 if (!check_ids(old->active_lock_id, cur->active_lock_id, idmap) || 884 old->active_lock_ptr != cur->active_lock_ptr) 885 return false; 886 887 for (i = 0; i < old->acquired_refs; i++) { 888 if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) || 889 old->refs[i].type != cur->refs[i].type) 890 return false; 891 switch (old->refs[i].type) { 892 case REF_TYPE_PTR: 893 case REF_TYPE_IRQ: 894 break; 895 case REF_TYPE_LOCK: 896 case REF_TYPE_RES_LOCK: 897 case REF_TYPE_RES_LOCK_IRQ: 898 if (old->refs[i].ptr != cur->refs[i].ptr) 899 return false; 900 break; 901 default: 902 WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type); 903 return false; 904 } 905 } 906 907 return true; 908 } 909 910 /* compare two verifier states 911 * 912 * all states stored in state_list are known to be valid, since 913 * verifier reached 'bpf_exit' instruction through them 914 * 915 * this function is called when verifier exploring different branches of 916 * execution popped from the state stack. If it sees an old state that has 917 * more strict register state and more strict stack state then this execution 918 * branch doesn't need to be explored further, since verifier already 919 * concluded that more strict state leads to valid finish. 920 * 921 * Therefore two states are equivalent if register state is more conservative 922 * and explored stack state is more conservative than the current one. 923 * Example: 924 * explored current 925 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC) 926 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC) 927 * 928 * In other words if current stack state (one being explored) has more 929 * valid slots than old one that already passed validation, it means 930 * the verifier can stop exploring and conclude that current state is valid too 931 * 932 * Similarly with registers. If explored state has register type as invalid 933 * whereas register type in current state is meaningful, it means that 934 * the current state will reach 'bpf_exit' instruction safely 935 */ 936 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, 937 struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact) 938 { 939 u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before; 940 u16 i; 941 942 if (old->callback_depth > cur->callback_depth) 943 return false; 944 945 if (!old->no_stack_arg_load && cur->no_stack_arg_load) 946 return false; 947 948 for (i = 0; i < MAX_BPF_REG; i++) 949 if (((1 << i) & live_regs) && 950 !regsafe(env, &old->regs[i], &cur->regs[i], 951 &env->idmap_scratch, exact)) 952 return false; 953 954 if (!stacksafe(env, old, cur, &env->idmap_scratch, exact)) 955 return false; 956 957 if (!stack_arg_safe(env, old, cur, &env->idmap_scratch, exact)) 958 return false; 959 960 return true; 961 } 962 963 static void reset_idmap_scratch(struct bpf_verifier_env *env) 964 { 965 struct bpf_idmap *idmap = &env->idmap_scratch; 966 967 idmap->tmp_id_gen = env->id_gen; 968 idmap->cnt = 0; 969 } 970 971 static bool states_equal(struct bpf_verifier_env *env, 972 struct bpf_verifier_state *old, 973 struct bpf_verifier_state *cur, 974 enum exact_level exact) 975 { 976 u32 insn_idx; 977 int i; 978 979 if (old->curframe != cur->curframe) 980 return false; 981 982 reset_idmap_scratch(env); 983 984 /* Verification state from speculative execution simulation 985 * must never prune a non-speculative execution one. 986 */ 987 if (old->speculative && !cur->speculative) 988 return false; 989 990 if (old->in_sleepable != cur->in_sleepable) 991 return false; 992 993 if (!refsafe(old, cur, &env->idmap_scratch)) 994 return false; 995 996 /* for states to be equal callsites have to be the same 997 * and all frame states need to be equivalent 998 */ 999 for (i = 0; i <= old->curframe; i++) { 1000 insn_idx = bpf_frame_insn_idx(old, i); 1001 if (old->frame[i]->callsite != cur->frame[i]->callsite) 1002 return false; 1003 if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact)) 1004 return false; 1005 } 1006 return true; 1007 } 1008 1009 /* find precise scalars in the previous equivalent state and 1010 * propagate them into the current state 1011 */ 1012 static int propagate_precision(struct bpf_verifier_env *env, 1013 const struct bpf_verifier_state *old, 1014 struct bpf_verifier_state *cur, 1015 bool *changed) 1016 { 1017 struct bpf_reg_state *state_reg; 1018 struct bpf_func_state *state; 1019 int i, err = 0, fr; 1020 bool first; 1021 1022 for (fr = old->curframe; fr >= 0; fr--) { 1023 state = old->frame[fr]; 1024 state_reg = state->regs; 1025 first = true; 1026 for (i = 0; i < BPF_REG_FP; i++, state_reg++) { 1027 if (state_reg->type != SCALAR_VALUE || 1028 !state_reg->precise) 1029 continue; 1030 if (env->log.level & BPF_LOG_LEVEL2) { 1031 if (first) 1032 verbose(env, "frame %d: propagating r%d", fr, i); 1033 else 1034 verbose(env, ",r%d", i); 1035 } 1036 bpf_bt_set_frame_reg(&env->bt, fr, i); 1037 first = false; 1038 } 1039 1040 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 1041 if (!bpf_is_spilled_reg(&state->stack[i])) 1042 continue; 1043 state_reg = &state->stack[i].spilled_ptr; 1044 if (state_reg->type != SCALAR_VALUE || 1045 !state_reg->precise) 1046 continue; 1047 if (env->log.level & BPF_LOG_LEVEL2) { 1048 if (first) 1049 verbose(env, "frame %d: propagating fp%d", 1050 fr, (-i - 1) * BPF_REG_SIZE); 1051 else 1052 verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE); 1053 } 1054 bpf_bt_set_frame_slot(&env->bt, fr, i); 1055 first = false; 1056 } 1057 if (!first && (env->log.level & BPF_LOG_LEVEL2)) 1058 verbose(env, "\n"); 1059 } 1060 1061 err = bpf_mark_chain_precision(env, cur, -1, changed); 1062 if (err < 0) 1063 return err; 1064 1065 return 0; 1066 } 1067 1068 #define MAX_BACKEDGE_ITERS 64 1069 1070 /* Propagate read and precision marks from visit->backedges[*].state->equal_state 1071 * to corresponding parent states of visit->backedges[*].state until fixed point is reached, 1072 * then free visit->backedges. 1073 * After execution of this function incomplete_read_marks() will return false 1074 * for all states corresponding to @visit->callchain. 1075 */ 1076 static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit) 1077 { 1078 struct bpf_scc_backedge *backedge; 1079 struct bpf_verifier_state *st; 1080 bool changed; 1081 int i, err; 1082 1083 i = 0; 1084 do { 1085 if (i++ > MAX_BACKEDGE_ITERS) { 1086 if (env->log.level & BPF_LOG_LEVEL2) 1087 verbose(env, "%s: too many iterations\n", __func__); 1088 for (backedge = visit->backedges; backedge; backedge = backedge->next) 1089 bpf_mark_all_scalars_precise(env, &backedge->state); 1090 break; 1091 } 1092 changed = false; 1093 for (backedge = visit->backedges; backedge; backedge = backedge->next) { 1094 st = &backedge->state; 1095 err = propagate_precision(env, st->equal_state, st, &changed); 1096 if (err) 1097 return err; 1098 } 1099 } while (changed); 1100 1101 bpf_free_backedges(visit); 1102 return 0; 1103 } 1104 1105 static bool states_maybe_looping(struct bpf_verifier_state *old, 1106 struct bpf_verifier_state *cur) 1107 { 1108 struct bpf_func_state *fold, *fcur; 1109 int i, fr = cur->curframe; 1110 1111 if (old->curframe != fr) 1112 return false; 1113 1114 fold = old->frame[fr]; 1115 fcur = cur->frame[fr]; 1116 for (i = 0; i < MAX_BPF_REG; i++) 1117 if (memcmp(&fold->regs[i], &fcur->regs[i], 1118 offsetof(struct bpf_reg_state, frameno))) 1119 return false; 1120 return true; 1121 } 1122 1123 /* is_state_visited() handles iter_next() (see process_iter_next_call() for 1124 * terminology) calls specially: as opposed to bounded BPF loops, it *expects* 1125 * states to match, which otherwise would look like an infinite loop. So while 1126 * iter_next() calls are taken care of, we still need to be careful and 1127 * prevent erroneous and too eager declaration of "infinite loop", when 1128 * iterators are involved. 1129 * 1130 * Here's a situation in pseudo-BPF assembly form: 1131 * 1132 * 0: again: ; set up iter_next() call args 1133 * 1: r1 = &it ; <CHECKPOINT HERE> 1134 * 2: call bpf_iter_num_next ; this is iter_next() call 1135 * 3: if r0 == 0 goto done 1136 * 4: ... something useful here ... 1137 * 5: goto again ; another iteration 1138 * 6: done: 1139 * 7: r1 = &it 1140 * 8: call bpf_iter_num_destroy ; clean up iter state 1141 * 9: exit 1142 * 1143 * This is a typical loop. Let's assume that we have a prune point at 1:, 1144 * before we get to `call bpf_iter_num_next` (e.g., because of that `goto 1145 * again`, assuming other heuristics don't get in a way). 1146 * 1147 * When we first time come to 1:, let's say we have some state X. We proceed 1148 * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit. 1149 * Now we come back to validate that forked ACTIVE state. We proceed through 1150 * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we 1151 * are converging. But the problem is that we don't know that yet, as this 1152 * convergence has to happen at iter_next() call site only. So if nothing is 1153 * done, at 1: verifier will use bounded loop logic and declare infinite 1154 * looping (and would be *technically* correct, if not for iterator's 1155 * "eventual sticky NULL" contract, see process_iter_next_call()). But we 1156 * don't want that. So what we do in process_iter_next_call() when we go on 1157 * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's 1158 * a different iteration. So when we suspect an infinite loop, we additionally 1159 * check if any of the *ACTIVE* iterator states depths differ. If yes, we 1160 * pretend we are not looping and wait for next iter_next() call. 1161 * 1162 * This only applies to ACTIVE state. In DRAINED state we don't expect to 1163 * loop, because that would actually mean infinite loop, as DRAINED state is 1164 * "sticky", and so we'll keep returning into the same instruction with the 1165 * same state (at least in one of possible code paths). 1166 * 1167 * This approach allows to keep infinite loop heuristic even in the face of 1168 * active iterator. E.g., C snippet below is and will be detected as 1169 * infinitely looping: 1170 * 1171 * struct bpf_iter_num it; 1172 * int *p, x; 1173 * 1174 * bpf_iter_num_new(&it, 0, 10); 1175 * while ((p = bpf_iter_num_next(&t))) { 1176 * x = p; 1177 * while (x--) {} // <<-- infinite loop here 1178 * } 1179 * 1180 */ 1181 static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur) 1182 { 1183 struct bpf_reg_state *slot, *cur_slot; 1184 struct bpf_func_state *state; 1185 int i, fr; 1186 1187 for (fr = old->curframe; fr >= 0; fr--) { 1188 state = old->frame[fr]; 1189 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 1190 if (state->stack[i].slot_type[0] != STACK_ITER) 1191 continue; 1192 1193 slot = &state->stack[i].spilled_ptr; 1194 if (slot->iter.state != BPF_ITER_STATE_ACTIVE) 1195 continue; 1196 1197 cur_slot = &cur->frame[fr]->stack[i].spilled_ptr; 1198 if (cur_slot->iter.depth != slot->iter.depth) 1199 return true; 1200 } 1201 } 1202 return false; 1203 } 1204 1205 static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 1206 { 1207 struct bpf_func_state *func; 1208 struct bpf_reg_state *reg; 1209 int i, j; 1210 1211 for (i = 0; i <= st->curframe; i++) { 1212 func = st->frame[i]; 1213 for (j = 0; j < BPF_REG_FP; j++) { 1214 reg = &func->regs[j]; 1215 if (reg->type != SCALAR_VALUE) 1216 continue; 1217 reg->precise = false; 1218 } 1219 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { 1220 if (!bpf_is_spilled_reg(&func->stack[j])) 1221 continue; 1222 reg = &func->stack[j].spilled_ptr; 1223 if (reg->type != SCALAR_VALUE) 1224 continue; 1225 reg->precise = false; 1226 } 1227 } 1228 } 1229 1230 int bpf_is_state_visited(struct bpf_verifier_env *env, int insn_idx) 1231 { 1232 struct bpf_verifier_state_list *new_sl; 1233 struct bpf_verifier_state_list *sl; 1234 struct bpf_verifier_state *cur = env->cur_state, *new; 1235 bool force_new_state, add_new_state, loop; 1236 int n, err, states_cnt = 0; 1237 struct list_head *pos, *tmp, *head; 1238 1239 force_new_state = env->test_state_freq || bpf_is_force_checkpoint(env, insn_idx) || 1240 /* Avoid accumulating infinitely long jmp history */ 1241 cur->jmp_history_cnt > 40; 1242 1243 /* bpf progs typically have pruning point every 4 instructions 1244 * http://vger.kernel.org/bpfconf2019.html#session-1 1245 * Do not add new state for future pruning if the verifier hasn't seen 1246 * at least 2 jumps and at least 8 instructions. 1247 * This heuristics helps decrease 'total_states' and 'peak_states' metric. 1248 * In tests that amounts to up to 50% reduction into total verifier 1249 * memory consumption and 20% verifier time speedup. 1250 */ 1251 add_new_state = force_new_state; 1252 if (env->jmps_processed - env->prev_jmps_processed >= 2 && 1253 env->insn_processed - env->prev_insn_processed >= 8) 1254 add_new_state = true; 1255 1256 /* keep cleaning the current state as registers/stack become dead */ 1257 err = clean_verifier_state(env, cur); 1258 if (err) 1259 return err; 1260 1261 loop = false; 1262 head = bpf_explored_state(env, insn_idx); 1263 list_for_each_safe(pos, tmp, head) { 1264 sl = container_of(pos, struct bpf_verifier_state_list, node); 1265 states_cnt++; 1266 if (sl->state.insn_idx != insn_idx) 1267 continue; 1268 1269 if (sl->state.branches) { 1270 struct bpf_func_state *frame = sl->state.frame[sl->state.curframe]; 1271 1272 if (frame->in_async_callback_fn && 1273 frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) { 1274 /* Different async_entry_cnt means that the verifier is 1275 * processing another entry into async callback. 1276 * Seeing the same state is not an indication of infinite 1277 * loop or infinite recursion. 1278 * But finding the same state doesn't mean that it's safe 1279 * to stop processing the current state. The previous state 1280 * hasn't yet reached bpf_exit, since state.branches > 0. 1281 * Checking in_async_callback_fn alone is not enough either. 1282 * Since the verifier still needs to catch infinite loops 1283 * inside async callbacks. 1284 */ 1285 goto skip_inf_loop_check; 1286 } 1287 /* BPF open-coded iterators loop detection is special. 1288 * states_maybe_looping() logic is too simplistic in detecting 1289 * states that *might* be equivalent, because it doesn't know 1290 * about ID remapping, so don't even perform it. 1291 * See process_iter_next_call() and iter_active_depths_differ() 1292 * for overview of the logic. When current and one of parent 1293 * states are detected as equivalent, it's a good thing: we prove 1294 * convergence and can stop simulating further iterations. 1295 * It's safe to assume that iterator loop will finish, taking into 1296 * account iter_next() contract of eventually returning 1297 * sticky NULL result. 1298 * 1299 * Note, that states have to be compared exactly in this case because 1300 * read and precision marks might not be finalized inside the loop. 1301 * E.g. as in the program below: 1302 * 1303 * 1. r7 = -16 1304 * 2. r6 = bpf_get_prandom_u32() 1305 * 3. while (bpf_iter_num_next(&fp[-8])) { 1306 * 4. if (r6 != 42) { 1307 * 5. r7 = -32 1308 * 6. r6 = bpf_get_prandom_u32() 1309 * 7. continue 1310 * 8. } 1311 * 9. r0 = r10 1312 * 10. r0 += r7 1313 * 11. r8 = *(u64 *)(r0 + 0) 1314 * 12. r6 = bpf_get_prandom_u32() 1315 * 13. } 1316 * 1317 * Here verifier would first visit path 1-3, create a checkpoint at 3 1318 * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does 1319 * not have read or precision mark for r7 yet, thus inexact states 1320 * comparison would discard current state with r7=-32 1321 * => unsafe memory access at 11 would not be caught. 1322 */ 1323 if (is_iter_next_insn(env, insn_idx)) { 1324 if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) { 1325 struct bpf_func_state *cur_frame; 1326 struct bpf_reg_state *iter_state, *iter_reg; 1327 int spi; 1328 1329 cur_frame = cur->frame[cur->curframe]; 1330 /* btf_check_iter_kfuncs() enforces that 1331 * iter state pointer is always the first arg 1332 */ 1333 iter_reg = &cur_frame->regs[BPF_REG_1]; 1334 /* current state is valid due to states_equal(), 1335 * so we can assume valid iter and reg state, 1336 * no need for extra (re-)validations 1337 */ 1338 spi = bpf_get_spi(iter_reg->var_off.value); 1339 iter_state = &bpf_func(env, iter_reg)->stack[spi].spilled_ptr; 1340 if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) { 1341 loop = true; 1342 goto hit; 1343 } 1344 } 1345 goto skip_inf_loop_check; 1346 } 1347 if (is_may_goto_insn_at(env, insn_idx)) { 1348 if (sl->state.may_goto_depth != cur->may_goto_depth && 1349 states_equal(env, &sl->state, cur, RANGE_WITHIN)) { 1350 loop = true; 1351 goto hit; 1352 } 1353 } 1354 if (bpf_calls_callback(env, insn_idx)) { 1355 if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) { 1356 loop = true; 1357 goto hit; 1358 } 1359 goto skip_inf_loop_check; 1360 } 1361 /* attempt to detect infinite loop to avoid unnecessary doomed work */ 1362 if (states_maybe_looping(&sl->state, cur) && 1363 states_equal(env, &sl->state, cur, EXACT) && 1364 !iter_active_depths_differ(&sl->state, cur) && 1365 sl->state.may_goto_depth == cur->may_goto_depth && 1366 sl->state.callback_unroll_depth == cur->callback_unroll_depth) { 1367 verbose_linfo(env, insn_idx, "; "); 1368 verbose(env, "infinite loop detected at insn %d\n", insn_idx); 1369 verbose(env, "cur state:"); 1370 print_verifier_state(env, cur, cur->curframe, true); 1371 verbose(env, "old state:"); 1372 print_verifier_state(env, &sl->state, cur->curframe, true); 1373 return -EINVAL; 1374 } 1375 /* if the verifier is processing a loop, avoid adding new state 1376 * too often, since different loop iterations have distinct 1377 * states and may not help future pruning. 1378 * This threshold shouldn't be too low to make sure that 1379 * a loop with large bound will be rejected quickly. 1380 * The most abusive loop will be: 1381 * r1 += 1 1382 * if r1 < 1000000 goto pc-2 1383 * 1M insn_procssed limit / 100 == 10k peak states. 1384 * This threshold shouldn't be too high either, since states 1385 * at the end of the loop are likely to be useful in pruning. 1386 */ 1387 skip_inf_loop_check: 1388 if (!force_new_state && 1389 env->jmps_processed - env->prev_jmps_processed < 20 && 1390 env->insn_processed - env->prev_insn_processed < 100) 1391 add_new_state = false; 1392 goto miss; 1393 } 1394 /* See comments for mark_all_regs_read_and_precise() */ 1395 loop = incomplete_read_marks(env, &sl->state); 1396 if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) { 1397 hit: 1398 sl->hit_cnt++; 1399 1400 /* if previous state reached the exit with precision and 1401 * current state is equivalent to it (except precision marks) 1402 * the precision needs to be propagated back in 1403 * the current state. 1404 */ 1405 err = 0; 1406 if (bpf_is_jmp_point(env, env->insn_idx)) 1407 err = bpf_push_jmp_history(env, cur, 0, 0, 0, 0); 1408 err = err ? : propagate_precision(env, &sl->state, cur, NULL); 1409 if (err) 1410 return err; 1411 /* When processing iterator based loops above propagate_liveness and 1412 * propagate_precision calls are not sufficient to transfer all relevant 1413 * read and precision marks. E.g. consider the following case: 1414 * 1415 * .-> A --. Assume the states are visited in the order A, B, C. 1416 * | | | Assume that state B reaches a state equivalent to state A. 1417 * | v v At this point, state C is not processed yet, so state A 1418 * '-- B C has not received any read or precision marks from C. 1419 * Thus, marks propagated from A to B are incomplete. 1420 * 1421 * The verifier mitigates this by performing the following steps: 1422 * 1423 * - Prior to the main verification pass, strongly connected components 1424 * (SCCs) are computed over the program's control flow graph, 1425 * intraprocedurally. 1426 * 1427 * - During the main verification pass, `maybe_enter_scc()` checks 1428 * whether the current verifier state is entering an SCC. If so, an 1429 * instance of a `bpf_scc_visit` object is created, and the state 1430 * entering the SCC is recorded as the entry state. 1431 * 1432 * - This instance is associated not with the SCC itself, but with a 1433 * `bpf_scc_callchain`: a tuple consisting of the call sites leading to 1434 * the SCC and the SCC id. See `compute_scc_callchain()`. 1435 * 1436 * - When a verification path encounters a `states_equal(..., 1437 * RANGE_WITHIN)` condition, there exists a call chain describing the 1438 * current state and a corresponding `bpf_scc_visit` instance. A copy 1439 * of the current state is created and added to 1440 * `bpf_scc_visit->backedges`. 1441 * 1442 * - When a verification path terminates, `maybe_exit_scc()` is called 1443 * from `bpf_update_branch_counts()`. For states with `branches == 0`, it 1444 * checks whether the state is the entry state of any `bpf_scc_visit` 1445 * instance. If it is, this indicates that all paths originating from 1446 * this SCC visit have been explored. `propagate_backedges()` is then 1447 * called, which propagates read and precision marks through the 1448 * backedges until a fixed point is reached. 1449 * (In the earlier example, this would propagate marks from A to B, 1450 * from C to A, and then again from A to B.) 1451 * 1452 * A note on callchains 1453 * -------------------- 1454 * 1455 * Consider the following example: 1456 * 1457 * void foo() { loop { ... SCC#1 ... } } 1458 * void main() { 1459 * A: foo(); 1460 * B: ... 1461 * C: foo(); 1462 * } 1463 * 1464 * Here, there are two distinct callchains leading to SCC#1: 1465 * - (A, SCC#1) 1466 * - (C, SCC#1) 1467 * 1468 * Each callchain identifies a separate `bpf_scc_visit` instance that 1469 * accumulates backedge states. The `propagate_{liveness,precision}()` 1470 * functions traverse the parent state of each backedge state, which 1471 * means these parent states must remain valid (i.e., not freed) while 1472 * the corresponding `bpf_scc_visit` instance exists. 1473 * 1474 * Associating `bpf_scc_visit` instances directly with SCCs instead of 1475 * callchains would break this invariant: 1476 * - States explored during `C: foo()` would contribute backedges to 1477 * SCC#1, but SCC#1 would only be exited once the exploration of 1478 * `A: foo()` completes. 1479 * - By that time, the states explored between `A: foo()` and `C: foo()` 1480 * (i.e., `B: ...`) may have already been freed, causing the parent 1481 * links for states from `C: foo()` to become invalid. 1482 */ 1483 if (loop) { 1484 struct bpf_scc_backedge *backedge; 1485 1486 backedge = kzalloc_obj(*backedge, 1487 GFP_KERNEL_ACCOUNT); 1488 if (!backedge) 1489 return -ENOMEM; 1490 err = bpf_copy_verifier_state(&backedge->state, cur); 1491 backedge->state.equal_state = &sl->state; 1492 backedge->state.insn_idx = insn_idx; 1493 err = err ?: add_scc_backedge(env, &sl->state, backedge); 1494 if (err) { 1495 bpf_free_verifier_state(&backedge->state, false); 1496 kfree(backedge); 1497 return err; 1498 } 1499 } 1500 return 1; 1501 } 1502 miss: 1503 /* when new state is not going to be added do not increase miss count. 1504 * Otherwise several loop iterations will remove the state 1505 * recorded earlier. The goal of these heuristics is to have 1506 * states from some iterations of the loop (some in the beginning 1507 * and some at the end) to help pruning. 1508 */ 1509 if (add_new_state) 1510 sl->miss_cnt++; 1511 /* heuristic to determine whether this state is beneficial 1512 * to keep checking from state equivalence point of view. 1513 * Higher numbers increase max_states_per_insn and verification time, 1514 * but do not meaningfully decrease insn_processed. 1515 * 'n' controls how many times state could miss before eviction. 1516 * Use bigger 'n' for checkpoints because evicting checkpoint states 1517 * too early would hinder iterator convergence. 1518 */ 1519 n = bpf_is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3; 1520 if (sl->miss_cnt > sl->hit_cnt * n + n) { 1521 /* the state is unlikely to be useful. Remove it to 1522 * speed up verification 1523 */ 1524 sl->in_free_list = true; 1525 list_del(&sl->node); 1526 list_add(&sl->node, &env->free_list); 1527 env->free_list_size++; 1528 env->explored_states_size--; 1529 maybe_free_verifier_state(env, sl); 1530 } 1531 } 1532 1533 if (env->max_states_per_insn < states_cnt) 1534 env->max_states_per_insn = states_cnt; 1535 1536 if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) 1537 return 0; 1538 1539 if (!add_new_state) 1540 return 0; 1541 1542 /* There were no equivalent states, remember the current one. 1543 * Technically the current state is not proven to be safe yet, 1544 * but it will either reach outer most bpf_exit (which means it's safe) 1545 * or it will be rejected. When there are no loops the verifier won't be 1546 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) 1547 * again on the way to bpf_exit. 1548 * When looping the sl->state.branches will be > 0 and this state 1549 * will not be considered for equivalence until branches == 0. 1550 */ 1551 new_sl = kzalloc_obj(struct bpf_verifier_state_list, GFP_KERNEL_ACCOUNT); 1552 if (!new_sl) 1553 return -ENOMEM; 1554 env->total_states++; 1555 env->explored_states_size++; 1556 update_peak_states(env); 1557 env->prev_jmps_processed = env->jmps_processed; 1558 env->prev_insn_processed = env->insn_processed; 1559 1560 /* forget precise markings we inherited, see __mark_chain_precision */ 1561 if (env->bpf_capable) 1562 mark_all_scalars_imprecise(env, cur); 1563 1564 bpf_clear_singular_ids(env, cur); 1565 1566 /* add new state to the head of linked list */ 1567 new = &new_sl->state; 1568 err = bpf_copy_verifier_state(new, cur); 1569 if (err) { 1570 bpf_free_verifier_state(new, false); 1571 kfree(new_sl); 1572 return err; 1573 } 1574 new->insn_idx = insn_idx; 1575 verifier_bug_if(new->branches != 1, env, 1576 "%s:branches_to_explore=%d insn %d", 1577 __func__, new->branches, insn_idx); 1578 err = maybe_enter_scc(env, new); 1579 if (err) { 1580 bpf_free_verifier_state(new, false); 1581 kfree(new_sl); 1582 return err; 1583 } 1584 1585 cur->parent = new; 1586 cur->first_insn_idx = insn_idx; 1587 cur->dfs_depth = new->dfs_depth + 1; 1588 bpf_clear_jmp_history(cur); 1589 list_add(&new_sl->node, head); 1590 return 0; 1591 } 1592