1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ 3 #include <linux/bpf.h> 4 #include <linux/bpf_verifier.h> 5 #include <linux/cnum.h> 6 #include <linux/filter.h> 7 8 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args) 9 10 #define BPF_COMPLEXITY_LIMIT_STATES 64 11 12 static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx) 13 { 14 return bpf_is_may_goto_insn(&env->prog->insnsi[insn_idx]); 15 } 16 17 static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx) 18 { 19 return env->insn_aux_data[insn_idx].is_iter_next; 20 } 21 22 static void update_peak_states(struct bpf_verifier_env *env) 23 { 24 u32 cur_states; 25 26 cur_states = env->explored_states_size + env->free_list_size + env->num_backedges; 27 env->peak_states = max(env->peak_states, cur_states); 28 } 29 30 /* struct bpf_verifier_state->parent refers to states 31 * that are in either of env->{expored_states,free_list}. 32 * In both cases the state is contained in struct bpf_verifier_state_list. 33 */ 34 static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st) 35 { 36 if (st->parent) 37 return container_of(st->parent, struct bpf_verifier_state_list, state); 38 return NULL; 39 } 40 41 static bool incomplete_read_marks(struct bpf_verifier_env *env, 42 struct bpf_verifier_state *st); 43 44 /* A state can be freed if it is no longer referenced: 45 * - is in the env->free_list; 46 * - has no children states; 47 */ 48 static void maybe_free_verifier_state(struct bpf_verifier_env *env, 49 struct bpf_verifier_state_list *sl) 50 { 51 if (!sl->in_free_list 52 || sl->state.branches != 0 53 || incomplete_read_marks(env, &sl->state)) 54 return; 55 list_del(&sl->node); 56 bpf_free_verifier_state(&sl->state, false); 57 kfree(sl); 58 env->free_list_size--; 59 } 60 61 /* For state @st look for a topmost frame with frame_insn_idx() in some SCC, 62 * if such frame exists form a corresponding @callchain as an array of 63 * call sites leading to this frame and SCC id. 64 * E.g.: 65 * 66 * void foo() { A: loop {... SCC#1 ...}; } 67 * void bar() { B: loop { C: foo(); ... SCC#2 ... } 68 * D: loop { E: foo(); ... SCC#3 ... } } 69 * void main() { F: bar(); } 70 * 71 * @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending 72 * on @st frame call sites being (F,C,A) or (F,E,A). 73 */ 74 static bool compute_scc_callchain(struct bpf_verifier_env *env, 75 struct bpf_verifier_state *st, 76 struct bpf_scc_callchain *callchain) 77 { 78 u32 i, scc, insn_idx; 79 80 memset(callchain, 0, sizeof(*callchain)); 81 for (i = 0; i <= st->curframe; i++) { 82 insn_idx = bpf_frame_insn_idx(st, i); 83 scc = env->insn_aux_data[insn_idx].scc; 84 if (scc) { 85 callchain->scc = scc; 86 break; 87 } else if (i < st->curframe) { 88 callchain->callsites[i] = insn_idx; 89 } else { 90 return false; 91 } 92 } 93 return true; 94 } 95 96 /* Check if bpf_scc_visit instance for @callchain exists. */ 97 static struct bpf_scc_visit *scc_visit_lookup(struct bpf_verifier_env *env, 98 struct bpf_scc_callchain *callchain) 99 { 100 struct bpf_scc_info *info = env->scc_info[callchain->scc]; 101 struct bpf_scc_visit *visits = info->visits; 102 u32 i; 103 104 if (!info) 105 return NULL; 106 for (i = 0; i < info->num_visits; i++) 107 if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0) 108 return &visits[i]; 109 return NULL; 110 } 111 112 /* Allocate a new bpf_scc_visit instance corresponding to @callchain. 113 * Allocated instances are alive for a duration of the do_check_common() 114 * call and are freed by free_states(). 115 */ 116 static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env, 117 struct bpf_scc_callchain *callchain) 118 { 119 struct bpf_scc_visit *visit; 120 struct bpf_scc_info *info; 121 u32 scc, num_visits; 122 u64 new_sz; 123 124 scc = callchain->scc; 125 info = env->scc_info[scc]; 126 num_visits = info ? info->num_visits : 0; 127 new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1); 128 info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT); 129 if (!info) 130 return NULL; 131 env->scc_info[scc] = info; 132 info->num_visits = num_visits + 1; 133 visit = &info->visits[num_visits]; 134 memset(visit, 0, sizeof(*visit)); 135 memcpy(&visit->callchain, callchain, sizeof(*callchain)); 136 return visit; 137 } 138 139 /* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */ 140 static char *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain) 141 { 142 char *buf = env->tmp_str_buf; 143 int i, delta = 0; 144 145 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "("); 146 for (i = 0; i < ARRAY_SIZE(callchain->callsites); i++) { 147 if (!callchain->callsites[i]) 148 break; 149 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u,", 150 callchain->callsites[i]); 151 } 152 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u)", callchain->scc); 153 return env->tmp_str_buf; 154 } 155 156 /* If callchain for @st exists (@st is in some SCC), ensure that 157 * bpf_scc_visit instance for this callchain exists. 158 * If instance does not exist or is empty, assign visit->entry_state to @st. 159 */ 160 static int maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 161 { 162 struct bpf_scc_callchain *callchain = &env->callchain_buf; 163 struct bpf_scc_visit *visit; 164 165 if (!compute_scc_callchain(env, st, callchain)) 166 return 0; 167 visit = scc_visit_lookup(env, callchain); 168 visit = visit ?: scc_visit_alloc(env, callchain); 169 if (!visit) 170 return -ENOMEM; 171 if (!visit->entry_state) { 172 visit->entry_state = st; 173 if (env->log.level & BPF_LOG_LEVEL2) 174 verbose(env, "SCC enter %s\n", format_callchain(env, callchain)); 175 } 176 return 0; 177 } 178 179 static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit); 180 181 /* If callchain for @st exists (@st is in some SCC), make it empty: 182 * - set visit->entry_state to NULL; 183 * - flush accumulated backedges. 184 */ 185 static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 186 { 187 struct bpf_scc_callchain *callchain = &env->callchain_buf; 188 struct bpf_scc_visit *visit; 189 190 if (!compute_scc_callchain(env, st, callchain)) 191 return 0; 192 visit = scc_visit_lookup(env, callchain); 193 if (!visit) { 194 /* 195 * If path traversal stops inside an SCC, corresponding bpf_scc_visit 196 * must exist for non-speculative paths. For non-speculative paths 197 * traversal stops when: 198 * a. Verification error is found, maybe_exit_scc() is not called. 199 * b. Top level BPF_EXIT is reached. Top level BPF_EXIT is not a member 200 * of any SCC. 201 * c. A checkpoint is reached and matched. Checkpoints are created by 202 * is_state_visited(), which calls maybe_enter_scc(), which allocates 203 * bpf_scc_visit instances for checkpoints within SCCs. 204 * (c) is the only case that can reach this point. 205 */ 206 if (!st->speculative) { 207 verifier_bug(env, "scc exit: no visit info for call chain %s", 208 format_callchain(env, callchain)); 209 return -EFAULT; 210 } 211 return 0; 212 } 213 if (visit->entry_state != st) 214 return 0; 215 if (env->log.level & BPF_LOG_LEVEL2) 216 verbose(env, "SCC exit %s\n", format_callchain(env, callchain)); 217 visit->entry_state = NULL; 218 env->num_backedges -= visit->num_backedges; 219 visit->num_backedges = 0; 220 update_peak_states(env); 221 return propagate_backedges(env, visit); 222 } 223 224 /* Lookup an bpf_scc_visit instance corresponding to @st callchain 225 * and add @backedge to visit->backedges. @st callchain must exist. 226 */ 227 static int add_scc_backedge(struct bpf_verifier_env *env, 228 struct bpf_verifier_state *st, 229 struct bpf_scc_backedge *backedge) 230 { 231 struct bpf_scc_callchain *callchain = &env->callchain_buf; 232 struct bpf_scc_visit *visit; 233 234 if (!compute_scc_callchain(env, st, callchain)) { 235 verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d", 236 st->insn_idx); 237 return -EFAULT; 238 } 239 visit = scc_visit_lookup(env, callchain); 240 if (!visit) { 241 verifier_bug(env, "add backedge: no visit info for call chain %s", 242 format_callchain(env, callchain)); 243 return -EFAULT; 244 } 245 if (env->log.level & BPF_LOG_LEVEL2) 246 verbose(env, "SCC backedge %s\n", format_callchain(env, callchain)); 247 backedge->next = visit->backedges; 248 visit->backedges = backedge; 249 visit->num_backedges++; 250 env->num_backedges++; 251 update_peak_states(env); 252 return 0; 253 } 254 255 /* bpf_reg_state->live marks for registers in a state @st are incomplete, 256 * if state @st is in some SCC and not all execution paths starting at this 257 * SCC are fully explored. 258 */ 259 static bool incomplete_read_marks(struct bpf_verifier_env *env, 260 struct bpf_verifier_state *st) 261 { 262 struct bpf_scc_callchain *callchain = &env->callchain_buf; 263 struct bpf_scc_visit *visit; 264 265 if (!compute_scc_callchain(env, st, callchain)) 266 return false; 267 visit = scc_visit_lookup(env, callchain); 268 if (!visit) 269 return false; 270 return !!visit->backedges; 271 } 272 273 int bpf_update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 274 { 275 struct bpf_verifier_state_list *sl = NULL, *parent_sl; 276 struct bpf_verifier_state *parent; 277 int err; 278 279 while (st) { 280 u32 br = --st->branches; 281 282 /* verifier_bug_if(br > 1, ...) technically makes sense here, 283 * but see comment in push_stack(), hence: 284 */ 285 verifier_bug_if((int)br < 0, env, "%s:branches_to_explore=%d", __func__, br); 286 if (br) 287 break; 288 err = maybe_exit_scc(env, st); 289 if (err) 290 return err; 291 parent = st->parent; 292 parent_sl = state_parent_as_list(st); 293 if (sl) 294 maybe_free_verifier_state(env, sl); 295 st = parent; 296 sl = parent_sl; 297 } 298 return 0; 299 } 300 301 /* check %cur's range satisfies %old's */ 302 static bool range_within(const struct bpf_reg_state *old, 303 const struct bpf_reg_state *cur) 304 { 305 return cnum64_is_subset(old->r64, cur->r64) && 306 cnum32_is_subset(old->r32, cur->r32); 307 } 308 309 /* If in the old state two registers had the same id, then they need to have 310 * the same id in the new state as well. But that id could be different from 311 * the old state, so we need to track the mapping from old to new ids. 312 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent 313 * regs with old id 5 must also have new id 9 for the new state to be safe. But 314 * regs with a different old id could still have new id 9, we don't care about 315 * that. 316 * So we look through our idmap to see if this old id has been seen before. If 317 * so, we require the new id to match; otherwise, we add the id pair to the map. 318 */ 319 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) 320 { 321 struct bpf_id_pair *map = idmap->map; 322 unsigned int i; 323 324 /* either both IDs should be set or both should be zero */ 325 if (!!old_id != !!cur_id) 326 return false; 327 328 if (old_id == 0) /* cur_id == 0 as well */ 329 return true; 330 331 for (i = 0; i < idmap->cnt; i++) { 332 if (map[i].old == old_id) 333 return map[i].cur == cur_id; 334 if (map[i].cur == cur_id) 335 return false; 336 } 337 338 /* Reached the end of known mappings; haven't seen this id before */ 339 if (idmap->cnt < BPF_ID_MAP_SIZE) { 340 map[idmap->cnt].old = old_id; 341 map[idmap->cnt].cur = cur_id; 342 idmap->cnt++; 343 return true; 344 } 345 346 /* We ran out of idmap slots, which should be impossible */ 347 WARN_ON_ONCE(1); 348 return false; 349 } 350 351 /* 352 * Compare scalar register IDs for state equivalence. 353 * 354 * When old_id == 0, the old register is independent - not linked to any 355 * other register. Any linking in the current state only adds constraints, 356 * making it more restrictive. Since the old state didn't rely on any ID 357 * relationships for this register, it's always safe to accept cur regardless 358 * of its ID. Hence, return true immediately. 359 * 360 * When old_id != 0 but cur_id == 0, we need to ensure that different 361 * independent registers in cur don't incorrectly satisfy the ID matching 362 * requirements of linked registers in old. 363 * 364 * Example: if old has r6.id=X and r7.id=X (linked), but cur has r6.id=0 365 * and r7.id=0 (both independent), without temp IDs both would map old_id=X 366 * to cur_id=0 and pass. With temp IDs: r6 maps X->temp1, r7 tries to map 367 * X->temp2, but X is already mapped to temp1, so the check fails correctly. 368 * 369 * When old_id has BPF_ADD_CONST set, the compound id (base | flag) and the 370 * base id (flag stripped) must both map consistently. Example: old has 371 * r2.id=A, r3.id=A|flag (r3 = r2 + delta), cur has r2.id=B, r3.id=C|flag 372 * (r3 derived from unrelated r4). Without the base check, idmap gets two 373 * independent entries A->B and A|flag->C|flag, missing that A->C conflicts 374 * with A->B. The base ID cross-check catches this. 375 */ 376 static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) 377 { 378 if (!old_id) 379 return true; 380 381 cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen; 382 383 if (!check_ids(old_id, cur_id, idmap)) 384 return false; 385 if (old_id & BPF_ADD_CONST) { 386 old_id &= ~BPF_ADD_CONST; 387 cur_id &= ~BPF_ADD_CONST; 388 if (!check_ids(old_id, cur_id, idmap)) 389 return false; 390 } 391 return true; 392 } 393 394 static void __clean_func_state(struct bpf_verifier_env *env, 395 struct bpf_func_state *st, 396 u16 live_regs, int frame) 397 { 398 int i, j; 399 400 for (i = 0; i < BPF_REG_FP; i++) { 401 /* liveness must not touch this register anymore */ 402 if (!(live_regs & BIT(i))) 403 /* since the register is unused, clear its state 404 * to make further comparison simpler 405 */ 406 bpf_mark_reg_not_init(env, &st->regs[i]); 407 } 408 409 /* 410 * Clean dead 4-byte halves within each SPI independently. 411 * half_spi 2*i → lower half: slot_type[0..3] (closer to FP) 412 * half_spi 2*i+1 → upper half: slot_type[4..7] (farther from FP) 413 */ 414 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) { 415 bool lo_live = bpf_stack_slot_alive(env, frame, i * 2); 416 bool hi_live = bpf_stack_slot_alive(env, frame, i * 2 + 1); 417 418 if (!hi_live || !lo_live) { 419 int start = !lo_live ? 0 : BPF_REG_SIZE / 2; 420 int end = !hi_live ? BPF_REG_SIZE : BPF_REG_SIZE / 2; 421 u8 stype = st->stack[i].slot_type[7]; 422 423 /* 424 * Don't clear special slots. 425 * destroy_if_dynptr_stack_slot() needs STACK_DYNPTR to 426 * detect overwrites and invalidate associated data slices. 427 * is_iter_reg_valid_uninit() and is_irq_flag_reg_valid_uninit() 428 * check for their respective slot types to detect double-create. 429 */ 430 if (stype == STACK_DYNPTR || stype == STACK_ITER || 431 stype == STACK_IRQ_FLAG) 432 continue; 433 434 /* 435 * Only destroy spilled_ptr when hi half is dead. 436 * If hi half is still live with STACK_SPILL, the 437 * spilled_ptr metadata is needed for correct state 438 * comparison in stacksafe(). 439 * is_spilled_reg() is using slot_type[7], but 440 * is_spilled_scalar_after() check either slot_type[0] or [4] 441 */ 442 if (!hi_live) { 443 struct bpf_reg_state *spill = &st->stack[i].spilled_ptr; 444 445 if (lo_live && stype == STACK_SPILL) { 446 u8 val = STACK_MISC; 447 448 /* 449 * 8 byte spill of scalar 0 where half slot is dead 450 * should become STACK_ZERO in lo 4 bytes. 451 */ 452 if (bpf_register_is_null(spill)) 453 val = STACK_ZERO; 454 for (j = 0; j < 4; j++) { 455 u8 *t = &st->stack[i].slot_type[j]; 456 457 if (*t == STACK_SPILL) 458 *t = val; 459 } 460 } 461 bpf_mark_reg_not_init(env, spill); 462 } 463 for (j = start; j < end; j++) 464 st->stack[i].slot_type[j] = STACK_POISON; 465 } 466 } 467 } 468 469 static int clean_verifier_state(struct bpf_verifier_env *env, 470 struct bpf_verifier_state *st) 471 { 472 int i, err; 473 474 err = bpf_live_stack_query_init(env, st); 475 if (err) 476 return err; 477 for (i = 0; i <= st->curframe; i++) { 478 u32 ip = bpf_frame_insn_idx(st, i); 479 u16 live_regs = env->insn_aux_data[ip].live_regs_before; 480 481 __clean_func_state(env, st->frame[i], live_regs, i); 482 } 483 return 0; 484 } 485 486 static bool regs_exact(const struct bpf_reg_state *rold, 487 const struct bpf_reg_state *rcur, 488 struct bpf_idmap *idmap) 489 { 490 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 491 check_ids(rold->id, rcur->id, idmap) && 492 check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); 493 } 494 495 enum exact_level { 496 NOT_EXACT, 497 EXACT, 498 RANGE_WITHIN 499 }; 500 501 /* Returns true if (rold safe implies rcur safe) */ 502 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, 503 struct bpf_reg_state *rcur, struct bpf_idmap *idmap, 504 enum exact_level exact) 505 { 506 if (exact == EXACT) 507 return regs_exact(rold, rcur, idmap); 508 509 if (rold->type == NOT_INIT) 510 /* explored state can't have used this */ 511 return true; 512 513 /* Enforce that register types have to match exactly, including their 514 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general 515 * rule. 516 * 517 * One can make a point that using a pointer register as unbounded 518 * SCALAR would be technically acceptable, but this could lead to 519 * pointer leaks because scalars are allowed to leak while pointers 520 * are not. We could make this safe in special cases if root is 521 * calling us, but it's probably not worth the hassle. 522 * 523 * Also, register types that are *not* MAYBE_NULL could technically be 524 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE 525 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point 526 * to the same map). 527 * However, if the old MAYBE_NULL register then got NULL checked, 528 * doing so could have affected others with the same id, and we can't 529 * check for that because we lost the id when we converted to 530 * a non-MAYBE_NULL variant. 531 * So, as a general rule we don't allow mixing MAYBE_NULL and 532 * non-MAYBE_NULL registers as well. 533 */ 534 if (rold->type != rcur->type) 535 return false; 536 537 switch (base_type(rold->type)) { 538 case SCALAR_VALUE: 539 if (env->explore_alu_limits) { 540 /* explore_alu_limits disables tnum_in() and range_within() 541 * logic and requires everything to be strict 542 */ 543 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 544 check_scalar_ids(rold->id, rcur->id, idmap); 545 } 546 if (!rold->precise && exact == NOT_EXACT) 547 return true; 548 /* 549 * Linked register tracking uses rold->id to detect relationships. 550 * When rold->id == 0, the register is independent and any linking 551 * in rcur only adds constraints. When rold->id != 0, we must verify 552 * id mapping and (for BPF_ADD_CONST) offset consistency. 553 * 554 * +------------------+-----------+------------------+---------------+ 555 * | | rold->id | rold + ADD_CONST | rold->id == 0 | 556 * |------------------+-----------+------------------+---------------| 557 * | rcur->id | range,ids | false | range | 558 * | rcur + ADD_CONST | false | range,ids,off | range | 559 * | rcur->id == 0 | range,ids | false | range | 560 * +------------------+-----------+------------------+---------------+ 561 * 562 * Why check_ids() for scalar registers? 563 * 564 * Consider the following BPF code: 565 * 1: r6 = ... unbound scalar, ID=a ... 566 * 2: r7 = ... unbound scalar, ID=b ... 567 * 3: if (r6 > r7) goto +1 568 * 4: r6 = r7 569 * 5: if (r6 > X) goto ... 570 * 6: ... memory operation using r7 ... 571 * 572 * First verification path is [1-6]: 573 * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7; 574 * - at (5) r6 would be marked <= X, sync_linked_regs() would also mark 575 * r7 <= X, because r6 and r7 share same id. 576 * Next verification path is [1-4, 6]. 577 * 578 * Instruction (6) would be reached in two states: 579 * I. r6{.id=b}, r7{.id=b} via path 1-6; 580 * II. r6{.id=a}, r7{.id=b} via path 1-4, 6. 581 * 582 * Use check_ids() to distinguish these states. 583 * --- 584 * Also verify that new value satisfies old value range knowledge. 585 */ 586 587 /* 588 * ADD_CONST flags must match exactly: BPF_ADD_CONST32 and 589 * BPF_ADD_CONST64 have different linking semantics in 590 * sync_linked_regs() (alu32 zero-extends, alu64 does not), 591 * so pruning across different flag types is unsafe. 592 */ 593 if (rold->id && 594 (rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST)) 595 return false; 596 597 /* Both have offset linkage: offsets must match */ 598 if ((rold->id & BPF_ADD_CONST) && rold->delta != rcur->delta) 599 return false; 600 601 if (!check_scalar_ids(rold->id, rcur->id, idmap)) 602 return false; 603 604 return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off); 605 case PTR_TO_MAP_KEY: 606 case PTR_TO_MAP_VALUE: 607 case PTR_TO_MEM: 608 case PTR_TO_BUF: 609 case PTR_TO_TP_BUFFER: 610 /* If the new min/max/var_off satisfy the old ones and 611 * everything else matches, we are OK. 612 */ 613 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 && 614 range_within(rold, rcur) && 615 tnum_in(rold->var_off, rcur->var_off) && 616 check_ids(rold->id, rcur->id, idmap) && 617 check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); 618 case PTR_TO_PACKET_META: 619 case PTR_TO_PACKET: 620 /* We must have at least as much range as the old ptr 621 * did, so that any accesses which were safe before are 622 * still safe. This is true even if old range < old off, 623 * since someone could have accessed through (ptr - k), or 624 * even done ptr -= k in a register, to get a safe access. 625 */ 626 if (rold->range < 0 || rcur->range < 0) { 627 /* special case for [BEYOND|AT]_PKT_END */ 628 if (rold->range != rcur->range) 629 return false; 630 } else if (rold->range > rcur->range) { 631 return false; 632 } 633 /* id relations must be preserved */ 634 if (!check_ids(rold->id, rcur->id, idmap)) 635 return false; 636 /* new val must satisfy old val knowledge */ 637 return range_within(rold, rcur) && 638 tnum_in(rold->var_off, rcur->var_off); 639 case PTR_TO_STACK: 640 /* two stack pointers are equal only if they're pointing to 641 * the same stack frame, since fp-8 in foo != fp-8 in bar 642 */ 643 return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno; 644 case PTR_TO_ARENA: 645 return true; 646 case PTR_TO_INSN: 647 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 && 648 range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off); 649 default: 650 return regs_exact(rold, rcur, idmap); 651 } 652 } 653 654 static struct bpf_reg_state unbound_reg; 655 656 static __init int unbound_reg_init(void) 657 { 658 bpf_mark_reg_unknown_imprecise(&unbound_reg); 659 return 0; 660 } 661 late_initcall(unbound_reg_init); 662 663 static bool is_spilled_scalar_after(const struct bpf_stack_state *stack, int im) 664 { 665 return stack->slot_type[im] == STACK_SPILL && 666 stack->spilled_ptr.type == SCALAR_VALUE; 667 } 668 669 static bool is_stack_misc_after(struct bpf_verifier_env *env, 670 struct bpf_stack_state *stack, int im) 671 { 672 u32 i; 673 674 for (i = im; i < ARRAY_SIZE(stack->slot_type); ++i) { 675 if ((stack->slot_type[i] == STACK_MISC) || 676 ((stack->slot_type[i] == STACK_INVALID || stack->slot_type[i] == STACK_POISON) && 677 env->allow_uninit_stack)) 678 continue; 679 return false; 680 } 681 682 return true; 683 } 684 685 static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env, 686 struct bpf_stack_state *stack, int im) 687 { 688 if (is_spilled_scalar_after(stack, im)) 689 return &stack->spilled_ptr; 690 691 if (is_stack_misc_after(env, stack, im)) 692 return &unbound_reg; 693 694 return NULL; 695 } 696 697 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, 698 struct bpf_func_state *cur, struct bpf_idmap *idmap, 699 enum exact_level exact) 700 { 701 int i, spi; 702 703 /* walk slots of the explored stack and ignore any additional 704 * slots in the current stack, since explored(safe) state 705 * didn't use them 706 */ 707 for (i = 0; i < old->allocated_stack; i++) { 708 struct bpf_reg_state *old_reg, *cur_reg; 709 int im = i % BPF_REG_SIZE; 710 711 spi = i / BPF_REG_SIZE; 712 713 if (exact == EXACT) { 714 u8 old_type = old->stack[spi].slot_type[i % BPF_REG_SIZE]; 715 u8 cur_type = i < cur->allocated_stack ? 716 cur->stack[spi].slot_type[i % BPF_REG_SIZE] : STACK_INVALID; 717 718 /* STACK_INVALID and STACK_POISON are equivalent for pruning */ 719 if (old_type == STACK_POISON) 720 old_type = STACK_INVALID; 721 if (cur_type == STACK_POISON) 722 cur_type = STACK_INVALID; 723 if (i >= cur->allocated_stack || old_type != cur_type) 724 return false; 725 } 726 727 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID || 728 old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_POISON) 729 continue; 730 731 if (env->allow_uninit_stack && 732 old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC) 733 continue; 734 735 /* explored stack has more populated slots than current stack 736 * and these slots were used 737 */ 738 if (i >= cur->allocated_stack) 739 return false; 740 741 /* 742 * 64 and 32-bit scalar spills vs MISC/INVALID slots and vice versa. 743 * Load from MISC/INVALID slots produces unbound scalar. 744 * Construct a fake register for such stack and call 745 * regsafe() to ensure scalar ids are compared. 746 */ 747 if (im == 0 || im == 4) { 748 old_reg = scalar_reg_for_stack(env, &old->stack[spi], im); 749 cur_reg = scalar_reg_for_stack(env, &cur->stack[spi], im); 750 if (old_reg && cur_reg) { 751 if (!regsafe(env, old_reg, cur_reg, idmap, exact)) 752 return false; 753 i += (im == 0 ? BPF_REG_SIZE - 1 : 3); 754 continue; 755 } 756 } 757 758 /* if old state was safe with misc data in the stack 759 * it will be safe with zero-initialized stack. 760 * The opposite is not true 761 */ 762 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC && 763 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO) 764 continue; 765 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != 766 cur->stack[spi].slot_type[i % BPF_REG_SIZE]) 767 /* Ex: old explored (safe) state has STACK_SPILL in 768 * this stack slot, but current has STACK_MISC -> 769 * this verifier states are not equivalent, 770 * return false to continue verification of this path 771 */ 772 return false; 773 if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1) 774 continue; 775 /* Both old and cur are having same slot_type */ 776 switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) { 777 case STACK_SPILL: 778 /* when explored and current stack slot are both storing 779 * spilled registers, check that stored pointers types 780 * are the same as well. 781 * Ex: explored safe path could have stored 782 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8} 783 * but current path has stored: 784 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16} 785 * such verifier states are not equivalent. 786 * return false to continue verification of this path 787 */ 788 if (!regsafe(env, &old->stack[spi].spilled_ptr, 789 &cur->stack[spi].spilled_ptr, idmap, exact)) 790 return false; 791 break; 792 case STACK_DYNPTR: 793 old_reg = &old->stack[spi].spilled_ptr; 794 cur_reg = &cur->stack[spi].spilled_ptr; 795 if (old_reg->dynptr.type != cur_reg->dynptr.type || 796 old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot || 797 !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) 798 return false; 799 break; 800 case STACK_ITER: 801 old_reg = &old->stack[spi].spilled_ptr; 802 cur_reg = &cur->stack[spi].spilled_ptr; 803 /* iter.depth is not compared between states as it 804 * doesn't matter for correctness and would otherwise 805 * prevent convergence; we maintain it only to prevent 806 * infinite loop check triggering, see 807 * iter_active_depths_differ() 808 */ 809 if (old_reg->iter.btf != cur_reg->iter.btf || 810 old_reg->iter.btf_id != cur_reg->iter.btf_id || 811 old_reg->iter.state != cur_reg->iter.state || 812 /* ignore {old_reg,cur_reg}->iter.depth, see above */ 813 !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) 814 return false; 815 break; 816 case STACK_IRQ_FLAG: 817 old_reg = &old->stack[spi].spilled_ptr; 818 cur_reg = &cur->stack[spi].spilled_ptr; 819 if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap) || 820 old_reg->irq.kfunc_class != cur_reg->irq.kfunc_class) 821 return false; 822 break; 823 case STACK_MISC: 824 case STACK_ZERO: 825 case STACK_INVALID: 826 case STACK_POISON: 827 continue; 828 /* Ensure that new unhandled slot types return false by default */ 829 default: 830 return false; 831 } 832 } 833 return true; 834 } 835 836 /* 837 * Compare stack arg slots between old and current states. 838 * Outgoing stack args are path-local state and must agree for pruning. 839 */ 840 static bool stack_arg_safe(struct bpf_verifier_env *env, struct bpf_func_state *old, 841 struct bpf_func_state *cur, struct bpf_idmap *idmap, 842 enum exact_level exact) 843 { 844 int i, nslots; 845 846 nslots = max(old->out_stack_arg_cnt, cur->out_stack_arg_cnt); 847 for (i = 0; i < nslots; i++) { 848 struct bpf_reg_state *old_arg, *cur_arg; 849 struct bpf_reg_state not_init = { .type = NOT_INIT }; 850 851 old_arg = i < old->out_stack_arg_cnt ? 852 &old->stack_arg_regs[i] : ¬_init; 853 cur_arg = i < cur->out_stack_arg_cnt ? 854 &cur->stack_arg_regs[i] : ¬_init; 855 if (!regsafe(env, old_arg, cur_arg, idmap, exact)) 856 return false; 857 } 858 859 return true; 860 } 861 862 static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur, 863 struct bpf_idmap *idmap) 864 { 865 int i; 866 867 if (old->acquired_refs != cur->acquired_refs) 868 return false; 869 870 if (old->active_locks != cur->active_locks) 871 return false; 872 873 if (old->active_preempt_locks != cur->active_preempt_locks) 874 return false; 875 876 if (old->active_rcu_locks != cur->active_rcu_locks) 877 return false; 878 879 if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap)) 880 return false; 881 882 if (!check_ids(old->active_lock_id, cur->active_lock_id, idmap) || 883 old->active_lock_ptr != cur->active_lock_ptr) 884 return false; 885 886 for (i = 0; i < old->acquired_refs; i++) { 887 if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) || 888 old->refs[i].type != cur->refs[i].type) 889 return false; 890 switch (old->refs[i].type) { 891 case REF_TYPE_PTR: 892 case REF_TYPE_IRQ: 893 break; 894 case REF_TYPE_LOCK: 895 case REF_TYPE_RES_LOCK: 896 case REF_TYPE_RES_LOCK_IRQ: 897 if (old->refs[i].ptr != cur->refs[i].ptr) 898 return false; 899 break; 900 default: 901 WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type); 902 return false; 903 } 904 } 905 906 return true; 907 } 908 909 /* compare two verifier states 910 * 911 * all states stored in state_list are known to be valid, since 912 * verifier reached 'bpf_exit' instruction through them 913 * 914 * this function is called when verifier exploring different branches of 915 * execution popped from the state stack. If it sees an old state that has 916 * more strict register state and more strict stack state then this execution 917 * branch doesn't need to be explored further, since verifier already 918 * concluded that more strict state leads to valid finish. 919 * 920 * Therefore two states are equivalent if register state is more conservative 921 * and explored stack state is more conservative than the current one. 922 * Example: 923 * explored current 924 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC) 925 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC) 926 * 927 * In other words if current stack state (one being explored) has more 928 * valid slots than old one that already passed validation, it means 929 * the verifier can stop exploring and conclude that current state is valid too 930 * 931 * Similarly with registers. If explored state has register type as invalid 932 * whereas register type in current state is meaningful, it means that 933 * the current state will reach 'bpf_exit' instruction safely 934 */ 935 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, 936 struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact) 937 { 938 u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before; 939 u16 i; 940 941 if (old->callback_depth > cur->callback_depth) 942 return false; 943 944 if (!old->no_stack_arg_load && cur->no_stack_arg_load) 945 return false; 946 947 for (i = 0; i < MAX_BPF_REG; i++) 948 if (((1 << i) & live_regs) && 949 !regsafe(env, &old->regs[i], &cur->regs[i], 950 &env->idmap_scratch, exact)) 951 return false; 952 953 if (!stacksafe(env, old, cur, &env->idmap_scratch, exact)) 954 return false; 955 956 if (!stack_arg_safe(env, old, cur, &env->idmap_scratch, exact)) 957 return false; 958 959 return true; 960 } 961 962 static void reset_idmap_scratch(struct bpf_verifier_env *env) 963 { 964 struct bpf_idmap *idmap = &env->idmap_scratch; 965 966 idmap->tmp_id_gen = env->id_gen; 967 idmap->cnt = 0; 968 } 969 970 static bool states_equal(struct bpf_verifier_env *env, 971 struct bpf_verifier_state *old, 972 struct bpf_verifier_state *cur, 973 enum exact_level exact) 974 { 975 u32 insn_idx; 976 int i; 977 978 if (old->curframe != cur->curframe) 979 return false; 980 981 reset_idmap_scratch(env); 982 983 /* Verification state from speculative execution simulation 984 * must never prune a non-speculative execution one. 985 */ 986 if (old->speculative && !cur->speculative) 987 return false; 988 989 if (old->in_sleepable != cur->in_sleepable) 990 return false; 991 992 if (!refsafe(old, cur, &env->idmap_scratch)) 993 return false; 994 995 /* for states to be equal callsites have to be the same 996 * and all frame states need to be equivalent 997 */ 998 for (i = 0; i <= old->curframe; i++) { 999 insn_idx = bpf_frame_insn_idx(old, i); 1000 if (old->frame[i]->callsite != cur->frame[i]->callsite) 1001 return false; 1002 if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact)) 1003 return false; 1004 } 1005 return true; 1006 } 1007 1008 /* find precise scalars in the previous equivalent state and 1009 * propagate them into the current state 1010 */ 1011 static int propagate_precision(struct bpf_verifier_env *env, 1012 const struct bpf_verifier_state *old, 1013 struct bpf_verifier_state *cur, 1014 bool *changed) 1015 { 1016 struct bpf_reg_state *state_reg; 1017 struct bpf_func_state *state; 1018 int i, err = 0, fr; 1019 bool first; 1020 1021 for (fr = old->curframe; fr >= 0; fr--) { 1022 state = old->frame[fr]; 1023 state_reg = state->regs; 1024 first = true; 1025 for (i = 0; i < BPF_REG_FP; i++, state_reg++) { 1026 if (state_reg->type != SCALAR_VALUE || 1027 !state_reg->precise) 1028 continue; 1029 if (env->log.level & BPF_LOG_LEVEL2) { 1030 if (first) 1031 verbose(env, "frame %d: propagating r%d", fr, i); 1032 else 1033 verbose(env, ",r%d", i); 1034 } 1035 bpf_bt_set_frame_reg(&env->bt, fr, i); 1036 first = false; 1037 } 1038 1039 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 1040 if (!bpf_is_spilled_reg(&state->stack[i])) 1041 continue; 1042 state_reg = &state->stack[i].spilled_ptr; 1043 if (state_reg->type != SCALAR_VALUE || 1044 !state_reg->precise) 1045 continue; 1046 if (env->log.level & BPF_LOG_LEVEL2) { 1047 if (first) 1048 verbose(env, "frame %d: propagating fp%d", 1049 fr, (-i - 1) * BPF_REG_SIZE); 1050 else 1051 verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE); 1052 } 1053 bpf_bt_set_frame_slot(&env->bt, fr, i); 1054 first = false; 1055 } 1056 if (!first && (env->log.level & BPF_LOG_LEVEL2)) 1057 verbose(env, "\n"); 1058 } 1059 1060 err = bpf_mark_chain_precision(env, cur, -1, changed); 1061 if (err < 0) 1062 return err; 1063 1064 return 0; 1065 } 1066 1067 #define MAX_BACKEDGE_ITERS 64 1068 1069 /* Propagate read and precision marks from visit->backedges[*].state->equal_state 1070 * to corresponding parent states of visit->backedges[*].state until fixed point is reached, 1071 * then free visit->backedges. 1072 * After execution of this function incomplete_read_marks() will return false 1073 * for all states corresponding to @visit->callchain. 1074 */ 1075 static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit) 1076 { 1077 struct bpf_scc_backedge *backedge; 1078 struct bpf_verifier_state *st; 1079 bool changed; 1080 int i, err; 1081 1082 i = 0; 1083 do { 1084 if (i++ > MAX_BACKEDGE_ITERS) { 1085 if (env->log.level & BPF_LOG_LEVEL2) 1086 verbose(env, "%s: too many iterations\n", __func__); 1087 for (backedge = visit->backedges; backedge; backedge = backedge->next) 1088 bpf_mark_all_scalars_precise(env, &backedge->state); 1089 break; 1090 } 1091 changed = false; 1092 for (backedge = visit->backedges; backedge; backedge = backedge->next) { 1093 st = &backedge->state; 1094 err = propagate_precision(env, st->equal_state, st, &changed); 1095 if (err) 1096 return err; 1097 } 1098 } while (changed); 1099 1100 bpf_free_backedges(visit); 1101 return 0; 1102 } 1103 1104 static bool states_maybe_looping(struct bpf_verifier_state *old, 1105 struct bpf_verifier_state *cur) 1106 { 1107 struct bpf_func_state *fold, *fcur; 1108 int i, fr = cur->curframe; 1109 1110 if (old->curframe != fr) 1111 return false; 1112 1113 fold = old->frame[fr]; 1114 fcur = cur->frame[fr]; 1115 for (i = 0; i < MAX_BPF_REG; i++) 1116 if (memcmp(&fold->regs[i], &fcur->regs[i], 1117 offsetof(struct bpf_reg_state, frameno))) 1118 return false; 1119 return true; 1120 } 1121 1122 /* is_state_visited() handles iter_next() (see process_iter_next_call() for 1123 * terminology) calls specially: as opposed to bounded BPF loops, it *expects* 1124 * states to match, which otherwise would look like an infinite loop. So while 1125 * iter_next() calls are taken care of, we still need to be careful and 1126 * prevent erroneous and too eager declaration of "infinite loop", when 1127 * iterators are involved. 1128 * 1129 * Here's a situation in pseudo-BPF assembly form: 1130 * 1131 * 0: again: ; set up iter_next() call args 1132 * 1: r1 = &it ; <CHECKPOINT HERE> 1133 * 2: call bpf_iter_num_next ; this is iter_next() call 1134 * 3: if r0 == 0 goto done 1135 * 4: ... something useful here ... 1136 * 5: goto again ; another iteration 1137 * 6: done: 1138 * 7: r1 = &it 1139 * 8: call bpf_iter_num_destroy ; clean up iter state 1140 * 9: exit 1141 * 1142 * This is a typical loop. Let's assume that we have a prune point at 1:, 1143 * before we get to `call bpf_iter_num_next` (e.g., because of that `goto 1144 * again`, assuming other heuristics don't get in a way). 1145 * 1146 * When we first time come to 1:, let's say we have some state X. We proceed 1147 * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit. 1148 * Now we come back to validate that forked ACTIVE state. We proceed through 1149 * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we 1150 * are converging. But the problem is that we don't know that yet, as this 1151 * convergence has to happen at iter_next() call site only. So if nothing is 1152 * done, at 1: verifier will use bounded loop logic and declare infinite 1153 * looping (and would be *technically* correct, if not for iterator's 1154 * "eventual sticky NULL" contract, see process_iter_next_call()). But we 1155 * don't want that. So what we do in process_iter_next_call() when we go on 1156 * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's 1157 * a different iteration. So when we suspect an infinite loop, we additionally 1158 * check if any of the *ACTIVE* iterator states depths differ. If yes, we 1159 * pretend we are not looping and wait for next iter_next() call. 1160 * 1161 * This only applies to ACTIVE state. In DRAINED state we don't expect to 1162 * loop, because that would actually mean infinite loop, as DRAINED state is 1163 * "sticky", and so we'll keep returning into the same instruction with the 1164 * same state (at least in one of possible code paths). 1165 * 1166 * This approach allows to keep infinite loop heuristic even in the face of 1167 * active iterator. E.g., C snippet below is and will be detected as 1168 * infinitely looping: 1169 * 1170 * struct bpf_iter_num it; 1171 * int *p, x; 1172 * 1173 * bpf_iter_num_new(&it, 0, 10); 1174 * while ((p = bpf_iter_num_next(&t))) { 1175 * x = p; 1176 * while (x--) {} // <<-- infinite loop here 1177 * } 1178 * 1179 */ 1180 static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur) 1181 { 1182 struct bpf_reg_state *slot, *cur_slot; 1183 struct bpf_func_state *state; 1184 int i, fr; 1185 1186 for (fr = old->curframe; fr >= 0; fr--) { 1187 state = old->frame[fr]; 1188 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { 1189 if (state->stack[i].slot_type[0] != STACK_ITER) 1190 continue; 1191 1192 slot = &state->stack[i].spilled_ptr; 1193 if (slot->iter.state != BPF_ITER_STATE_ACTIVE) 1194 continue; 1195 1196 cur_slot = &cur->frame[fr]->stack[i].spilled_ptr; 1197 if (cur_slot->iter.depth != slot->iter.depth) 1198 return true; 1199 } 1200 } 1201 return false; 1202 } 1203 1204 static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 1205 { 1206 struct bpf_func_state *func; 1207 struct bpf_reg_state *reg; 1208 int i, j; 1209 1210 for (i = 0; i <= st->curframe; i++) { 1211 func = st->frame[i]; 1212 for (j = 0; j < BPF_REG_FP; j++) { 1213 reg = &func->regs[j]; 1214 if (reg->type != SCALAR_VALUE) 1215 continue; 1216 reg->precise = false; 1217 } 1218 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { 1219 if (!bpf_is_spilled_reg(&func->stack[j])) 1220 continue; 1221 reg = &func->stack[j].spilled_ptr; 1222 if (reg->type != SCALAR_VALUE) 1223 continue; 1224 reg->precise = false; 1225 } 1226 } 1227 } 1228 1229 int bpf_is_state_visited(struct bpf_verifier_env *env, int insn_idx) 1230 { 1231 struct bpf_verifier_state_list *new_sl; 1232 struct bpf_verifier_state_list *sl; 1233 struct bpf_verifier_state *cur = env->cur_state, *new; 1234 bool force_new_state, add_new_state, loop; 1235 int n, err, states_cnt = 0; 1236 struct list_head *pos, *tmp, *head; 1237 1238 force_new_state = env->test_state_freq || bpf_is_force_checkpoint(env, insn_idx) || 1239 /* Avoid accumulating infinitely long jmp history */ 1240 cur->jmp_history_cnt > 40; 1241 1242 /* bpf progs typically have pruning point every 4 instructions 1243 * http://vger.kernel.org/bpfconf2019.html#session-1 1244 * Do not add new state for future pruning if the verifier hasn't seen 1245 * at least 2 jumps and at least 8 instructions. 1246 * This heuristics helps decrease 'total_states' and 'peak_states' metric. 1247 * In tests that amounts to up to 50% reduction into total verifier 1248 * memory consumption and 20% verifier time speedup. 1249 */ 1250 add_new_state = force_new_state; 1251 if (env->jmps_processed - env->prev_jmps_processed >= 2 && 1252 env->insn_processed - env->prev_insn_processed >= 8) 1253 add_new_state = true; 1254 1255 /* keep cleaning the current state as registers/stack become dead */ 1256 err = clean_verifier_state(env, cur); 1257 if (err) 1258 return err; 1259 1260 loop = false; 1261 head = bpf_explored_state(env, insn_idx); 1262 list_for_each_safe(pos, tmp, head) { 1263 sl = container_of(pos, struct bpf_verifier_state_list, node); 1264 states_cnt++; 1265 if (sl->state.insn_idx != insn_idx) 1266 continue; 1267 1268 if (sl->state.branches) { 1269 struct bpf_func_state *frame = sl->state.frame[sl->state.curframe]; 1270 1271 if (frame->in_async_callback_fn && 1272 frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) { 1273 /* Different async_entry_cnt means that the verifier is 1274 * processing another entry into async callback. 1275 * Seeing the same state is not an indication of infinite 1276 * loop or infinite recursion. 1277 * But finding the same state doesn't mean that it's safe 1278 * to stop processing the current state. The previous state 1279 * hasn't yet reached bpf_exit, since state.branches > 0. 1280 * Checking in_async_callback_fn alone is not enough either. 1281 * Since the verifier still needs to catch infinite loops 1282 * inside async callbacks. 1283 */ 1284 goto skip_inf_loop_check; 1285 } 1286 /* BPF open-coded iterators loop detection is special. 1287 * states_maybe_looping() logic is too simplistic in detecting 1288 * states that *might* be equivalent, because it doesn't know 1289 * about ID remapping, so don't even perform it. 1290 * See process_iter_next_call() and iter_active_depths_differ() 1291 * for overview of the logic. When current and one of parent 1292 * states are detected as equivalent, it's a good thing: we prove 1293 * convergence and can stop simulating further iterations. 1294 * It's safe to assume that iterator loop will finish, taking into 1295 * account iter_next() contract of eventually returning 1296 * sticky NULL result. 1297 * 1298 * Note, that states have to be compared exactly in this case because 1299 * read and precision marks might not be finalized inside the loop. 1300 * E.g. as in the program below: 1301 * 1302 * 1. r7 = -16 1303 * 2. r6 = bpf_get_prandom_u32() 1304 * 3. while (bpf_iter_num_next(&fp[-8])) { 1305 * 4. if (r6 != 42) { 1306 * 5. r7 = -32 1307 * 6. r6 = bpf_get_prandom_u32() 1308 * 7. continue 1309 * 8. } 1310 * 9. r0 = r10 1311 * 10. r0 += r7 1312 * 11. r8 = *(u64 *)(r0 + 0) 1313 * 12. r6 = bpf_get_prandom_u32() 1314 * 13. } 1315 * 1316 * Here verifier would first visit path 1-3, create a checkpoint at 3 1317 * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does 1318 * not have read or precision mark for r7 yet, thus inexact states 1319 * comparison would discard current state with r7=-32 1320 * => unsafe memory access at 11 would not be caught. 1321 */ 1322 if (is_iter_next_insn(env, insn_idx)) { 1323 if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) { 1324 struct bpf_func_state *cur_frame; 1325 struct bpf_reg_state *iter_state, *iter_reg; 1326 int spi; 1327 1328 cur_frame = cur->frame[cur->curframe]; 1329 /* btf_check_iter_kfuncs() enforces that 1330 * iter state pointer is always the first arg 1331 */ 1332 iter_reg = &cur_frame->regs[BPF_REG_1]; 1333 /* current state is valid due to states_equal(), 1334 * so we can assume valid iter and reg state, 1335 * no need for extra (re-)validations 1336 */ 1337 spi = bpf_get_spi(iter_reg->var_off.value); 1338 iter_state = &bpf_func(env, iter_reg)->stack[spi].spilled_ptr; 1339 if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) { 1340 loop = true; 1341 goto hit; 1342 } 1343 } 1344 goto skip_inf_loop_check; 1345 } 1346 if (is_may_goto_insn_at(env, insn_idx)) { 1347 if (sl->state.may_goto_depth != cur->may_goto_depth && 1348 states_equal(env, &sl->state, cur, RANGE_WITHIN)) { 1349 loop = true; 1350 goto hit; 1351 } 1352 } 1353 if (bpf_calls_callback(env, insn_idx)) { 1354 if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) { 1355 loop = true; 1356 goto hit; 1357 } 1358 goto skip_inf_loop_check; 1359 } 1360 /* attempt to detect infinite loop to avoid unnecessary doomed work */ 1361 if (states_maybe_looping(&sl->state, cur) && 1362 states_equal(env, &sl->state, cur, EXACT) && 1363 !iter_active_depths_differ(&sl->state, cur) && 1364 sl->state.may_goto_depth == cur->may_goto_depth && 1365 sl->state.callback_unroll_depth == cur->callback_unroll_depth) { 1366 verbose_linfo(env, insn_idx, "; "); 1367 verbose(env, "infinite loop detected at insn %d\n", insn_idx); 1368 verbose(env, "cur state:"); 1369 print_verifier_state(env, cur, cur->curframe, true); 1370 verbose(env, "old state:"); 1371 print_verifier_state(env, &sl->state, cur->curframe, true); 1372 return -EINVAL; 1373 } 1374 /* if the verifier is processing a loop, avoid adding new state 1375 * too often, since different loop iterations have distinct 1376 * states and may not help future pruning. 1377 * This threshold shouldn't be too low to make sure that 1378 * a loop with large bound will be rejected quickly. 1379 * The most abusive loop will be: 1380 * r1 += 1 1381 * if r1 < 1000000 goto pc-2 1382 * 1M insn_procssed limit / 100 == 10k peak states. 1383 * This threshold shouldn't be too high either, since states 1384 * at the end of the loop are likely to be useful in pruning. 1385 */ 1386 skip_inf_loop_check: 1387 if (!force_new_state && 1388 env->jmps_processed - env->prev_jmps_processed < 20 && 1389 env->insn_processed - env->prev_insn_processed < 100) 1390 add_new_state = false; 1391 goto miss; 1392 } 1393 /* See comments for mark_all_regs_read_and_precise() */ 1394 loop = incomplete_read_marks(env, &sl->state); 1395 if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) { 1396 hit: 1397 sl->hit_cnt++; 1398 1399 /* if previous state reached the exit with precision and 1400 * current state is equivalent to it (except precision marks) 1401 * the precision needs to be propagated back in 1402 * the current state. 1403 */ 1404 err = 0; 1405 if (bpf_is_jmp_point(env, env->insn_idx)) 1406 err = bpf_push_jmp_history(env, cur, 0, 0, 0, 0); 1407 err = err ? : propagate_precision(env, &sl->state, cur, NULL); 1408 if (err) 1409 return err; 1410 /* When processing iterator based loops above propagate_liveness and 1411 * propagate_precision calls are not sufficient to transfer all relevant 1412 * read and precision marks. E.g. consider the following case: 1413 * 1414 * .-> A --. Assume the states are visited in the order A, B, C. 1415 * | | | Assume that state B reaches a state equivalent to state A. 1416 * | v v At this point, state C is not processed yet, so state A 1417 * '-- B C has not received any read or precision marks from C. 1418 * Thus, marks propagated from A to B are incomplete. 1419 * 1420 * The verifier mitigates this by performing the following steps: 1421 * 1422 * - Prior to the main verification pass, strongly connected components 1423 * (SCCs) are computed over the program's control flow graph, 1424 * intraprocedurally. 1425 * 1426 * - During the main verification pass, `maybe_enter_scc()` checks 1427 * whether the current verifier state is entering an SCC. If so, an 1428 * instance of a `bpf_scc_visit` object is created, and the state 1429 * entering the SCC is recorded as the entry state. 1430 * 1431 * - This instance is associated not with the SCC itself, but with a 1432 * `bpf_scc_callchain`: a tuple consisting of the call sites leading to 1433 * the SCC and the SCC id. See `compute_scc_callchain()`. 1434 * 1435 * - When a verification path encounters a `states_equal(..., 1436 * RANGE_WITHIN)` condition, there exists a call chain describing the 1437 * current state and a corresponding `bpf_scc_visit` instance. A copy 1438 * of the current state is created and added to 1439 * `bpf_scc_visit->backedges`. 1440 * 1441 * - When a verification path terminates, `maybe_exit_scc()` is called 1442 * from `bpf_update_branch_counts()`. For states with `branches == 0`, it 1443 * checks whether the state is the entry state of any `bpf_scc_visit` 1444 * instance. If it is, this indicates that all paths originating from 1445 * this SCC visit have been explored. `propagate_backedges()` is then 1446 * called, which propagates read and precision marks through the 1447 * backedges until a fixed point is reached. 1448 * (In the earlier example, this would propagate marks from A to B, 1449 * from C to A, and then again from A to B.) 1450 * 1451 * A note on callchains 1452 * -------------------- 1453 * 1454 * Consider the following example: 1455 * 1456 * void foo() { loop { ... SCC#1 ... } } 1457 * void main() { 1458 * A: foo(); 1459 * B: ... 1460 * C: foo(); 1461 * } 1462 * 1463 * Here, there are two distinct callchains leading to SCC#1: 1464 * - (A, SCC#1) 1465 * - (C, SCC#1) 1466 * 1467 * Each callchain identifies a separate `bpf_scc_visit` instance that 1468 * accumulates backedge states. The `propagate_{liveness,precision}()` 1469 * functions traverse the parent state of each backedge state, which 1470 * means these parent states must remain valid (i.e., not freed) while 1471 * the corresponding `bpf_scc_visit` instance exists. 1472 * 1473 * Associating `bpf_scc_visit` instances directly with SCCs instead of 1474 * callchains would break this invariant: 1475 * - States explored during `C: foo()` would contribute backedges to 1476 * SCC#1, but SCC#1 would only be exited once the exploration of 1477 * `A: foo()` completes. 1478 * - By that time, the states explored between `A: foo()` and `C: foo()` 1479 * (i.e., `B: ...`) may have already been freed, causing the parent 1480 * links for states from `C: foo()` to become invalid. 1481 */ 1482 if (loop) { 1483 struct bpf_scc_backedge *backedge; 1484 1485 backedge = kzalloc_obj(*backedge, 1486 GFP_KERNEL_ACCOUNT); 1487 if (!backedge) 1488 return -ENOMEM; 1489 err = bpf_copy_verifier_state(&backedge->state, cur); 1490 backedge->state.equal_state = &sl->state; 1491 backedge->state.insn_idx = insn_idx; 1492 err = err ?: add_scc_backedge(env, &sl->state, backedge); 1493 if (err) { 1494 bpf_free_verifier_state(&backedge->state, false); 1495 kfree(backedge); 1496 return err; 1497 } 1498 } 1499 return 1; 1500 } 1501 miss: 1502 /* when new state is not going to be added do not increase miss count. 1503 * Otherwise several loop iterations will remove the state 1504 * recorded earlier. The goal of these heuristics is to have 1505 * states from some iterations of the loop (some in the beginning 1506 * and some at the end) to help pruning. 1507 */ 1508 if (add_new_state) 1509 sl->miss_cnt++; 1510 /* heuristic to determine whether this state is beneficial 1511 * to keep checking from state equivalence point of view. 1512 * Higher numbers increase max_states_per_insn and verification time, 1513 * but do not meaningfully decrease insn_processed. 1514 * 'n' controls how many times state could miss before eviction. 1515 * Use bigger 'n' for checkpoints because evicting checkpoint states 1516 * too early would hinder iterator convergence. 1517 */ 1518 n = bpf_is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3; 1519 if (sl->miss_cnt > sl->hit_cnt * n + n) { 1520 /* the state is unlikely to be useful. Remove it to 1521 * speed up verification 1522 */ 1523 sl->in_free_list = true; 1524 list_del(&sl->node); 1525 list_add(&sl->node, &env->free_list); 1526 env->free_list_size++; 1527 env->explored_states_size--; 1528 maybe_free_verifier_state(env, sl); 1529 } 1530 } 1531 1532 if (env->max_states_per_insn < states_cnt) 1533 env->max_states_per_insn = states_cnt; 1534 1535 if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) 1536 return 0; 1537 1538 if (!add_new_state) 1539 return 0; 1540 1541 /* There were no equivalent states, remember the current one. 1542 * Technically the current state is not proven to be safe yet, 1543 * but it will either reach outer most bpf_exit (which means it's safe) 1544 * or it will be rejected. When there are no loops the verifier won't be 1545 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) 1546 * again on the way to bpf_exit. 1547 * When looping the sl->state.branches will be > 0 and this state 1548 * will not be considered for equivalence until branches == 0. 1549 */ 1550 new_sl = kzalloc_obj(struct bpf_verifier_state_list, GFP_KERNEL_ACCOUNT); 1551 if (!new_sl) 1552 return -ENOMEM; 1553 env->total_states++; 1554 env->explored_states_size++; 1555 update_peak_states(env); 1556 env->prev_jmps_processed = env->jmps_processed; 1557 env->prev_insn_processed = env->insn_processed; 1558 1559 /* forget precise markings we inherited, see __mark_chain_precision */ 1560 if (env->bpf_capable) 1561 mark_all_scalars_imprecise(env, cur); 1562 1563 bpf_clear_singular_ids(env, cur); 1564 1565 /* add new state to the head of linked list */ 1566 new = &new_sl->state; 1567 err = bpf_copy_verifier_state(new, cur); 1568 if (err) { 1569 bpf_free_verifier_state(new, false); 1570 kfree(new_sl); 1571 return err; 1572 } 1573 new->insn_idx = insn_idx; 1574 verifier_bug_if(new->branches != 1, env, 1575 "%s:branches_to_explore=%d insn %d", 1576 __func__, new->branches, insn_idx); 1577 err = maybe_enter_scc(env, new); 1578 if (err) { 1579 bpf_free_verifier_state(new, false); 1580 kfree(new_sl); 1581 return err; 1582 } 1583 1584 cur->parent = new; 1585 cur->first_insn_idx = insn_idx; 1586 cur->dfs_depth = new->dfs_depth + 1; 1587 bpf_clear_jmp_history(cur); 1588 list_add(&new_sl->node, head); 1589 return 0; 1590 } 1591