xref: /linux/kernel/bpf/states.c (revision 056e065a6b6e01ab54bb9770c0d5a15350e571e2)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
3 #include <linux/bpf.h>
4 #include <linux/bpf_verifier.h>
5 #include <linux/cnum.h>
6 #include <linux/filter.h>
7 
8 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
9 
10 #define BPF_COMPLEXITY_LIMIT_STATES	64
11 
12 static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
13 {
14 	return bpf_is_may_goto_insn(&env->prog->insnsi[insn_idx]);
15 }
16 
17 static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
18 {
19 	return env->insn_aux_data[insn_idx].is_iter_next;
20 }
21 
22 static void update_peak_states(struct bpf_verifier_env *env)
23 {
24 	u32 cur_states;
25 
26 	cur_states = env->explored_states_size + env->free_list_size + env->num_backedges;
27 	env->peak_states = max(env->peak_states, cur_states);
28 }
29 
30 /* struct bpf_verifier_state->parent refers to states
31  * that are in either of env->{expored_states,free_list}.
32  * In both cases the state is contained in struct bpf_verifier_state_list.
33  */
34 static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st)
35 {
36 	if (st->parent)
37 		return container_of(st->parent, struct bpf_verifier_state_list, state);
38 	return NULL;
39 }
40 
41 static bool incomplete_read_marks(struct bpf_verifier_env *env,
42 				  struct bpf_verifier_state *st);
43 
44 /* A state can be freed if it is no longer referenced:
45  * - is in the env->free_list;
46  * - has no children states;
47  */
48 static void maybe_free_verifier_state(struct bpf_verifier_env *env,
49 				      struct bpf_verifier_state_list *sl)
50 {
51 	if (!sl->in_free_list
52 	    || sl->state.branches != 0
53 	    || incomplete_read_marks(env, &sl->state))
54 		return;
55 	list_del(&sl->node);
56 	bpf_free_verifier_state(&sl->state, false);
57 	kfree(sl);
58 	env->free_list_size--;
59 }
60 
61 /* For state @st look for a topmost frame with frame_insn_idx() in some SCC,
62  * if such frame exists form a corresponding @callchain as an array of
63  * call sites leading to this frame and SCC id.
64  * E.g.:
65  *
66  *    void foo()  { A: loop {... SCC#1 ...}; }
67  *    void bar()  { B: loop { C: foo(); ... SCC#2 ... }
68  *                  D: loop { E: foo(); ... SCC#3 ... } }
69  *    void main() { F: bar(); }
70  *
71  * @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending
72  * on @st frame call sites being (F,C,A) or (F,E,A).
73  */
74 static bool compute_scc_callchain(struct bpf_verifier_env *env,
75 				  struct bpf_verifier_state *st,
76 				  struct bpf_scc_callchain *callchain)
77 {
78 	u32 i, scc, insn_idx;
79 
80 	memset(callchain, 0, sizeof(*callchain));
81 	for (i = 0; i <= st->curframe; i++) {
82 		insn_idx = bpf_frame_insn_idx(st, i);
83 		scc = env->insn_aux_data[insn_idx].scc;
84 		if (scc) {
85 			callchain->scc = scc;
86 			break;
87 		} else if (i < st->curframe) {
88 			callchain->callsites[i] = insn_idx;
89 		} else {
90 			return false;
91 		}
92 	}
93 	return true;
94 }
95 
96 /* Check if bpf_scc_visit instance for @callchain exists. */
97 static struct bpf_scc_visit *scc_visit_lookup(struct bpf_verifier_env *env,
98 					      struct bpf_scc_callchain *callchain)
99 {
100 	struct bpf_scc_info *info = env->scc_info[callchain->scc];
101 	struct bpf_scc_visit *visits = info->visits;
102 	u32 i;
103 
104 	if (!info)
105 		return NULL;
106 	for (i = 0; i < info->num_visits; i++)
107 		if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0)
108 			return &visits[i];
109 	return NULL;
110 }
111 
112 /* Allocate a new bpf_scc_visit instance corresponding to @callchain.
113  * Allocated instances are alive for a duration of the do_check_common()
114  * call and are freed by free_states().
115  */
116 static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env,
117 					     struct bpf_scc_callchain *callchain)
118 {
119 	struct bpf_scc_visit *visit;
120 	struct bpf_scc_info *info;
121 	u32 scc, num_visits;
122 	u64 new_sz;
123 
124 	scc = callchain->scc;
125 	info = env->scc_info[scc];
126 	num_visits = info ? info->num_visits : 0;
127 	new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1);
128 	info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT);
129 	if (!info)
130 		return NULL;
131 	env->scc_info[scc] = info;
132 	info->num_visits = num_visits + 1;
133 	visit = &info->visits[num_visits];
134 	memset(visit, 0, sizeof(*visit));
135 	memcpy(&visit->callchain, callchain, sizeof(*callchain));
136 	return visit;
137 }
138 
139 /* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */
140 static char *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain)
141 {
142 	char *buf = env->tmp_str_buf;
143 	int i, delta = 0;
144 
145 	delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "(");
146 	for (i = 0; i < ARRAY_SIZE(callchain->callsites); i++) {
147 		if (!callchain->callsites[i])
148 			break;
149 		delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u,",
150 				  callchain->callsites[i]);
151 	}
152 	delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u)", callchain->scc);
153 	return env->tmp_str_buf;
154 }
155 
156 /* If callchain for @st exists (@st is in some SCC), ensure that
157  * bpf_scc_visit instance for this callchain exists.
158  * If instance does not exist or is empty, assign visit->entry_state to @st.
159  */
160 static int maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
161 {
162 	struct bpf_scc_callchain *callchain = &env->callchain_buf;
163 	struct bpf_scc_visit *visit;
164 
165 	if (!compute_scc_callchain(env, st, callchain))
166 		return 0;
167 	visit = scc_visit_lookup(env, callchain);
168 	visit = visit ?: scc_visit_alloc(env, callchain);
169 	if (!visit)
170 		return -ENOMEM;
171 	if (!visit->entry_state) {
172 		visit->entry_state = st;
173 		if (env->log.level & BPF_LOG_LEVEL2)
174 			verbose(env, "SCC enter %s\n", format_callchain(env, callchain));
175 	}
176 	return 0;
177 }
178 
179 static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit);
180 
181 /* If callchain for @st exists (@st is in some SCC), make it empty:
182  * - set visit->entry_state to NULL;
183  * - flush accumulated backedges.
184  */
185 static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
186 {
187 	struct bpf_scc_callchain *callchain = &env->callchain_buf;
188 	struct bpf_scc_visit *visit;
189 
190 	if (!compute_scc_callchain(env, st, callchain))
191 		return 0;
192 	visit = scc_visit_lookup(env, callchain);
193 	if (!visit) {
194 		/*
195 		 * If path traversal stops inside an SCC, corresponding bpf_scc_visit
196 		 * must exist for non-speculative paths. For non-speculative paths
197 		 * traversal stops when:
198 		 * a. Verification error is found, maybe_exit_scc() is not called.
199 		 * b. Top level BPF_EXIT is reached. Top level BPF_EXIT is not a member
200 		 *    of any SCC.
201 		 * c. A checkpoint is reached and matched. Checkpoints are created by
202 		 *    is_state_visited(), which calls maybe_enter_scc(), which allocates
203 		 *    bpf_scc_visit instances for checkpoints within SCCs.
204 		 * (c) is the only case that can reach this point.
205 		 */
206 		if (!st->speculative) {
207 			verifier_bug(env, "scc exit: no visit info for call chain %s",
208 				     format_callchain(env, callchain));
209 			return -EFAULT;
210 		}
211 		return 0;
212 	}
213 	if (visit->entry_state != st)
214 		return 0;
215 	if (env->log.level & BPF_LOG_LEVEL2)
216 		verbose(env, "SCC exit %s\n", format_callchain(env, callchain));
217 	visit->entry_state = NULL;
218 	env->num_backedges -= visit->num_backedges;
219 	visit->num_backedges = 0;
220 	update_peak_states(env);
221 	return propagate_backedges(env, visit);
222 }
223 
224 /* Lookup an bpf_scc_visit instance corresponding to @st callchain
225  * and add @backedge to visit->backedges. @st callchain must exist.
226  */
227 static int add_scc_backedge(struct bpf_verifier_env *env,
228 			    struct bpf_verifier_state *st,
229 			    struct bpf_scc_backedge *backedge)
230 {
231 	struct bpf_scc_callchain *callchain = &env->callchain_buf;
232 	struct bpf_scc_visit *visit;
233 
234 	if (!compute_scc_callchain(env, st, callchain)) {
235 		verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d",
236 			     st->insn_idx);
237 		return -EFAULT;
238 	}
239 	visit = scc_visit_lookup(env, callchain);
240 	if (!visit) {
241 		verifier_bug(env, "add backedge: no visit info for call chain %s",
242 			     format_callchain(env, callchain));
243 		return -EFAULT;
244 	}
245 	if (env->log.level & BPF_LOG_LEVEL2)
246 		verbose(env, "SCC backedge %s\n", format_callchain(env, callchain));
247 	backedge->next = visit->backedges;
248 	visit->backedges = backedge;
249 	visit->num_backedges++;
250 	env->num_backedges++;
251 	update_peak_states(env);
252 	return 0;
253 }
254 
255 /* bpf_reg_state->live marks for registers in a state @st are incomplete,
256  * if state @st is in some SCC and not all execution paths starting at this
257  * SCC are fully explored.
258  */
259 static bool incomplete_read_marks(struct bpf_verifier_env *env,
260 				  struct bpf_verifier_state *st)
261 {
262 	struct bpf_scc_callchain *callchain = &env->callchain_buf;
263 	struct bpf_scc_visit *visit;
264 
265 	if (!compute_scc_callchain(env, st, callchain))
266 		return false;
267 	visit = scc_visit_lookup(env, callchain);
268 	if (!visit)
269 		return false;
270 	return !!visit->backedges;
271 }
272 
273 int bpf_update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
274 {
275 	struct bpf_verifier_state_list *sl = NULL, *parent_sl;
276 	struct bpf_verifier_state *parent;
277 	int err;
278 
279 	while (st) {
280 		u32 br = --st->branches;
281 
282 		/* verifier_bug_if(br > 1, ...) technically makes sense here,
283 		 * but see comment in push_stack(), hence:
284 		 */
285 		verifier_bug_if((int)br < 0, env, "%s:branches_to_explore=%d", __func__, br);
286 		if (br)
287 			break;
288 		err = maybe_exit_scc(env, st);
289 		if (err)
290 			return err;
291 		parent = st->parent;
292 		parent_sl = state_parent_as_list(st);
293 		if (sl)
294 			maybe_free_verifier_state(env, sl);
295 		st = parent;
296 		sl = parent_sl;
297 	}
298 	return 0;
299 }
300 
301 /* check %cur's range satisfies %old's */
302 static bool range_within(const struct bpf_reg_state *old,
303 			 const struct bpf_reg_state *cur)
304 {
305 	return cnum64_is_subset(old->r64, cur->r64) &&
306 	       cnum32_is_subset(old->r32, cur->r32);
307 }
308 
309 /* If in the old state two registers had the same id, then they need to have
310  * the same id in the new state as well.  But that id could be different from
311  * the old state, so we need to track the mapping from old to new ids.
312  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
313  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
314  * regs with a different old id could still have new id 9, we don't care about
315  * that.
316  * So we look through our idmap to see if this old id has been seen before.  If
317  * so, we require the new id to match; otherwise, we add the id pair to the map.
318  */
319 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
320 {
321 	struct bpf_id_pair *map = idmap->map;
322 	unsigned int i;
323 
324 	/* either both IDs should be set or both should be zero */
325 	if (!!old_id != !!cur_id)
326 		return false;
327 
328 	if (old_id == 0) /* cur_id == 0 as well */
329 		return true;
330 
331 	for (i = 0; i < idmap->cnt; i++) {
332 		if (map[i].old == old_id)
333 			return map[i].cur == cur_id;
334 		if (map[i].cur == cur_id)
335 			return false;
336 	}
337 
338 	/* Reached the end of known mappings; haven't seen this id before */
339 	if (idmap->cnt < BPF_ID_MAP_SIZE) {
340 		map[idmap->cnt].old = old_id;
341 		map[idmap->cnt].cur = cur_id;
342 		idmap->cnt++;
343 		return true;
344 	}
345 
346 	/*
347 	 * idmap slots are bounded by the number of registers and stack slots.
348 	 * Since referenced dynptrs acquire intermediate references that do
349 	 * not live in either, so the map can be exhausted. Since it is unlikely,
350 	 * fail the verification by treating the states as not equivalent.
351 	 */
352 	return false;
353 }
354 
355 /*
356  * Compare scalar register IDs for state equivalence.
357  *
358  * When old_id == 0, the old register is independent - not linked to any
359  * other register. Any linking in the current state only adds constraints,
360  * making it more restrictive. Since the old state didn't rely on any ID
361  * relationships for this register, it's always safe to accept cur regardless
362  * of its ID. Hence, return true immediately.
363  *
364  * When old_id != 0 but cur_id == 0, we need to ensure that different
365  * independent registers in cur don't incorrectly satisfy the ID matching
366  * requirements of linked registers in old.
367  *
368  * Example: if old has r6.id=X and r7.id=X (linked), but cur has r6.id=0
369  * and r7.id=0 (both independent), without temp IDs both would map old_id=X
370  * to cur_id=0 and pass. With temp IDs: r6 maps X->temp1, r7 tries to map
371  * X->temp2, but X is already mapped to temp1, so the check fails correctly.
372  *
373  * When old_id has BPF_ADD_CONST set, the compound id (base | flag) and the
374  * base id (flag stripped) must both map consistently. Example: old has
375  * r2.id=A, r3.id=A|flag (r3 = r2 + delta), cur has r2.id=B, r3.id=C|flag
376  * (r3 derived from unrelated r4). Without the base check, idmap gets two
377  * independent entries A->B and A|flag->C|flag, missing that A->C conflicts
378  * with A->B. The base ID cross-check catches this.
379  */
380 static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
381 {
382 	if (!old_id)
383 		return true;
384 
385 	cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
386 
387 	if (!check_ids(old_id, cur_id, idmap))
388 		return false;
389 	if (old_id & BPF_ADD_CONST) {
390 		old_id &= ~BPF_ADD_CONST;
391 		cur_id &= ~BPF_ADD_CONST;
392 		if (!check_ids(old_id, cur_id, idmap))
393 			return false;
394 	}
395 	return true;
396 }
397 
398 static void __clean_func_state(struct bpf_verifier_env *env,
399 			       struct bpf_func_state *st,
400 			       u16 live_regs, int frame)
401 {
402 	int i, j;
403 
404 	for (i = 0; i < BPF_REG_FP; i++) {
405 		/* liveness must not touch this register anymore */
406 		if (!(live_regs & BIT(i)))
407 			/* since the register is unused, clear its state
408 			 * to make further comparison simpler
409 			 */
410 			bpf_mark_reg_not_init(env, &st->regs[i]);
411 	}
412 
413 	/*
414 	 * Clean dead 4-byte halves within each SPI independently.
415 	 * half_spi 2*i   → lower half: slot_type[0..3] (closer to FP)
416 	 * half_spi 2*i+1 → upper half: slot_type[4..7] (farther from FP)
417 	 */
418 	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
419 		bool lo_live = bpf_stack_slot_alive(env, frame, i * 2);
420 		bool hi_live = bpf_stack_slot_alive(env, frame, i * 2 + 1);
421 
422 		if (!hi_live || !lo_live) {
423 			int start = !lo_live ? 0 : BPF_REG_SIZE / 2;
424 			int end = !hi_live ? BPF_REG_SIZE : BPF_REG_SIZE / 2;
425 			u8 stype = st->stack[i].slot_type[7];
426 
427 			/*
428 			 * Don't clear special slots.
429 			 * destroy_if_dynptr_stack_slot() needs STACK_DYNPTR to
430 			 * detect overwrites and invalidate associated data slices.
431 			 * is_iter_reg_valid_uninit() and is_irq_flag_reg_valid_uninit()
432 			 * check for their respective slot types to detect double-create.
433 			 */
434 			if (stype == STACK_DYNPTR || stype == STACK_ITER ||
435 			    stype == STACK_IRQ_FLAG)
436 				continue;
437 
438 			/*
439 			 * Only destroy spilled_ptr when hi half is dead.
440 			 * If hi half is still live with STACK_SPILL, the
441 			 * spilled_ptr metadata is needed for correct state
442 			 * comparison in stacksafe().
443 			 * is_spilled_reg() is using slot_type[7], but
444 			 * is_spilled_scalar_after() check either slot_type[0] or [4]
445 			 */
446 			if (!hi_live) {
447 				struct bpf_reg_state *spill = &st->stack[i].spilled_ptr;
448 
449 				if (lo_live && stype == STACK_SPILL) {
450 					u8 val = STACK_MISC;
451 
452 					/*
453 					 * 8 byte spill of scalar 0 where half slot is dead
454 					 * should become STACK_ZERO in lo 4 bytes.
455 					 */
456 					if (bpf_register_is_null(spill))
457 						val = STACK_ZERO;
458 					for (j = 0; j < 4; j++) {
459 						u8 *t = &st->stack[i].slot_type[j];
460 
461 						if (*t == STACK_SPILL)
462 							*t = val;
463 					}
464 				}
465 				bpf_mark_reg_not_init(env, spill);
466 			}
467 			for (j = start; j < end; j++)
468 				st->stack[i].slot_type[j] = STACK_POISON;
469 		}
470 	}
471 }
472 
473 static int clean_verifier_state(struct bpf_verifier_env *env,
474 				 struct bpf_verifier_state *st)
475 {
476 	int i, err;
477 
478 	err = bpf_live_stack_query_init(env, st);
479 	if (err)
480 		return err;
481 	for (i = 0; i <= st->curframe; i++) {
482 		u32 ip = bpf_frame_insn_idx(st, i);
483 		u16 live_regs = env->insn_aux_data[ip].live_regs_before;
484 
485 		__clean_func_state(env, st->frame[i], live_regs, i);
486 	}
487 	return 0;
488 }
489 
490 static bool regs_exact(const struct bpf_reg_state *rold,
491 		       const struct bpf_reg_state *rcur,
492 		       struct bpf_idmap *idmap)
493 {
494 	return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
495 	       check_ids(rold->id, rcur->id, idmap) &&
496 	       check_ids(rold->parent_id, rcur->parent_id, idmap);
497 }
498 
499 enum exact_level {
500 	NOT_EXACT,
501 	EXACT,
502 	RANGE_WITHIN
503 };
504 
505 /* Returns true if (rold safe implies rcur safe) */
506 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
507 		    struct bpf_reg_state *rcur, struct bpf_idmap *idmap,
508 		    enum exact_level exact)
509 {
510 	if (exact == EXACT)
511 		return regs_exact(rold, rcur, idmap);
512 
513 	if (rold->type == NOT_INIT)
514 		/* explored state can't have used this */
515 		return true;
516 
517 	/* Enforce that register types have to match exactly, including their
518 	 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
519 	 * rule.
520 	 *
521 	 * One can make a point that using a pointer register as unbounded
522 	 * SCALAR would be technically acceptable, but this could lead to
523 	 * pointer leaks because scalars are allowed to leak while pointers
524 	 * are not. We could make this safe in special cases if root is
525 	 * calling us, but it's probably not worth the hassle.
526 	 *
527 	 * Also, register types that are *not* MAYBE_NULL could technically be
528 	 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
529 	 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
530 	 * to the same map).
531 	 * However, if the old MAYBE_NULL register then got NULL checked,
532 	 * doing so could have affected others with the same id, and we can't
533 	 * check for that because we lost the id when we converted to
534 	 * a non-MAYBE_NULL variant.
535 	 * So, as a general rule we don't allow mixing MAYBE_NULL and
536 	 * non-MAYBE_NULL registers as well.
537 	 */
538 	if (rold->type != rcur->type)
539 		return false;
540 
541 	switch (base_type(rold->type)) {
542 	case SCALAR_VALUE:
543 		if (env->explore_alu_limits) {
544 			/* explore_alu_limits disables tnum_in() and range_within()
545 			 * logic and requires everything to be strict
546 			 */
547 			return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
548 			       check_scalar_ids(rold->id, rcur->id, idmap);
549 		}
550 		if (!rold->precise && exact == NOT_EXACT)
551 			return true;
552 		/*
553 		 * Linked register tracking uses rold->id to detect relationships.
554 		 * When rold->id == 0, the register is independent and any linking
555 		 * in rcur only adds constraints. When rold->id != 0, we must verify
556 		 * id mapping and (for BPF_ADD_CONST) offset consistency.
557 		 *
558 		 * +------------------+-----------+------------------+---------------+
559 		 * |                  | rold->id  | rold + ADD_CONST | rold->id == 0 |
560 		 * |------------------+-----------+------------------+---------------|
561 		 * | rcur->id         | range,ids | false            | range         |
562 		 * | rcur + ADD_CONST | false     | range,ids,off    | range         |
563 		 * | rcur->id == 0    | range,ids | false            | range         |
564 		 * +------------------+-----------+------------------+---------------+
565 		 *
566 		 * Why check_ids() for scalar registers?
567 		 *
568 		 * Consider the following BPF code:
569 		 *   1: r6 = ... unbound scalar, ID=a ...
570 		 *   2: r7 = ... unbound scalar, ID=b ...
571 		 *   3: if (r6 > r7) goto +1
572 		 *   4: r6 = r7
573 		 *   5: if (r6 > X) goto ...
574 		 *   6: ... memory operation using r7 ...
575 		 *
576 		 * First verification path is [1-6]:
577 		 * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
578 		 * - at (5) r6 would be marked <= X, sync_linked_regs() would also mark
579 		 *   r7 <= X, because r6 and r7 share same id.
580 		 * Next verification path is [1-4, 6].
581 		 *
582 		 * Instruction (6) would be reached in two states:
583 		 *   I.  r6{.id=b}, r7{.id=b} via path 1-6;
584 		 *   II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
585 		 *
586 		 * Use check_ids() to distinguish these states.
587 		 * ---
588 		 * Also verify that new value satisfies old value range knowledge.
589 		 */
590 
591 		/*
592 		 * ADD_CONST flags must match exactly: BPF_ADD_CONST32 and
593 		 * BPF_ADD_CONST64 have different linking semantics in
594 		 * sync_linked_regs() (alu32 zero-extends, alu64 does not),
595 		 * so pruning across different flag types is unsafe.
596 		 */
597 		if (rold->id &&
598 		    (rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST))
599 			return false;
600 
601 		/* Both have offset linkage: offsets must match */
602 		if ((rold->id & BPF_ADD_CONST) && rold->delta != rcur->delta)
603 			return false;
604 
605 		if (!check_scalar_ids(rold->id, rcur->id, idmap))
606 			return false;
607 
608 		return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
609 	case PTR_TO_MAP_KEY:
610 	case PTR_TO_MAP_VALUE:
611 	case PTR_TO_MEM:
612 	case PTR_TO_BUF:
613 	case PTR_TO_TP_BUFFER:
614 		/* If the new min/max/var_off satisfy the old ones and
615 		 * everything else matches, we are OK.
616 		 */
617 		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
618 		       range_within(rold, rcur) &&
619 		       tnum_in(rold->var_off, rcur->var_off) &&
620 		       check_ids(rold->id, rcur->id, idmap) &&
621 		       check_ids(rold->parent_id, rcur->parent_id, idmap);
622 	case PTR_TO_PACKET_META:
623 	case PTR_TO_PACKET:
624 		/* We must have at least as much range as the old ptr
625 		 * did, so that any accesses which were safe before are
626 		 * still safe.  This is true even if old range < old off,
627 		 * since someone could have accessed through (ptr - k), or
628 		 * even done ptr -= k in a register, to get a safe access.
629 		 */
630 		if (rold->range < 0 || rcur->range < 0) {
631 			/* special case for [BEYOND|AT]_PKT_END */
632 			if (rold->range != rcur->range)
633 				return false;
634 		} else if (rold->range > rcur->range) {
635 			return false;
636 		}
637 		/* id relations must be preserved */
638 		if (!check_ids(rold->id, rcur->id, idmap))
639 			return false;
640 		/* new val must satisfy old val knowledge */
641 		return range_within(rold, rcur) &&
642 		       tnum_in(rold->var_off, rcur->var_off);
643 	case PTR_TO_STACK:
644 		/* two stack pointers are equal only if they're pointing to
645 		 * the same stack frame, since fp-8 in foo != fp-8 in bar
646 		 */
647 		return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
648 	case PTR_TO_ARENA:
649 		return true;
650 	case PTR_TO_INSN:
651 		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
652 		       range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
653 	default:
654 		return regs_exact(rold, rcur, idmap);
655 	}
656 }
657 
658 static struct bpf_reg_state unbound_reg;
659 
660 static __init int unbound_reg_init(void)
661 {
662 	bpf_mark_reg_unknown_imprecise(&unbound_reg);
663 	return 0;
664 }
665 late_initcall(unbound_reg_init);
666 
667 static bool is_spilled_scalar_after(const struct bpf_stack_state *stack, int im)
668 {
669 	return stack->slot_type[im] == STACK_SPILL &&
670 	       stack->spilled_ptr.type == SCALAR_VALUE;
671 }
672 
673 static bool is_stack_misc_after(struct bpf_verifier_env *env,
674 				struct bpf_stack_state *stack, int im)
675 {
676 	u32 i;
677 
678 	for (i = im; i < ARRAY_SIZE(stack->slot_type); ++i) {
679 		if ((stack->slot_type[i] == STACK_MISC) ||
680 		    ((stack->slot_type[i] == STACK_INVALID || stack->slot_type[i] == STACK_POISON) &&
681 		     env->allow_uninit_stack))
682 			continue;
683 		return false;
684 	}
685 
686 	return true;
687 }
688 
689 static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env,
690 						  struct bpf_stack_state *stack, int im)
691 {
692 	if (is_spilled_scalar_after(stack, im))
693 		return &stack->spilled_ptr;
694 
695 	if (is_stack_misc_after(env, stack, im))
696 		return &unbound_reg;
697 
698 	return NULL;
699 }
700 
701 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
702 		      struct bpf_func_state *cur, struct bpf_idmap *idmap,
703 		      enum exact_level exact)
704 {
705 	int i, spi;
706 
707 	/* walk slots of the explored stack and ignore any additional
708 	 * slots in the current stack, since explored(safe) state
709 	 * didn't use them
710 	 */
711 	for (i = 0; i < old->allocated_stack; i++) {
712 		struct bpf_reg_state *old_reg, *cur_reg;
713 		int im = i % BPF_REG_SIZE;
714 
715 		spi = i / BPF_REG_SIZE;
716 
717 		if (exact == EXACT) {
718 			u8 old_type = old->stack[spi].slot_type[i % BPF_REG_SIZE];
719 			u8 cur_type = i < cur->allocated_stack ?
720 				      cur->stack[spi].slot_type[i % BPF_REG_SIZE] : STACK_INVALID;
721 
722 			/* STACK_INVALID and STACK_POISON are equivalent for pruning */
723 			if (old_type == STACK_POISON)
724 				old_type = STACK_INVALID;
725 			if (cur_type == STACK_POISON)
726 				cur_type = STACK_INVALID;
727 			if (i >= cur->allocated_stack || old_type != cur_type)
728 				return false;
729 		}
730 
731 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID ||
732 		    old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_POISON)
733 			continue;
734 
735 		if (env->allow_uninit_stack &&
736 		    old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
737 			continue;
738 
739 		/* explored stack has more populated slots than current stack
740 		 * and these slots were used
741 		 */
742 		if (i >= cur->allocated_stack)
743 			return false;
744 
745 		/*
746 		 * 64 and 32-bit scalar spills vs MISC/INVALID slots and vice versa.
747 		 * Load from MISC/INVALID slots produces unbound scalar.
748 		 * Construct a fake register for such stack and call
749 		 * regsafe() to ensure scalar ids are compared.
750 		 */
751 		if (im == 0 || im == 4) {
752 			old_reg = scalar_reg_for_stack(env, &old->stack[spi], im);
753 			cur_reg = scalar_reg_for_stack(env, &cur->stack[spi], im);
754 			if (old_reg && cur_reg) {
755 				if (!regsafe(env, old_reg, cur_reg, idmap, exact))
756 					return false;
757 				i += (im == 0 ? BPF_REG_SIZE - 1 : 3);
758 				continue;
759 			}
760 		}
761 
762 		/* if old state was safe with misc data in the stack
763 		 * it will be safe with zero-initialized stack.
764 		 * The opposite is not true
765 		 */
766 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
767 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
768 			continue;
769 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
770 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
771 			/* Ex: old explored (safe) state has STACK_SPILL in
772 			 * this stack slot, but current has STACK_MISC ->
773 			 * this verifier states are not equivalent,
774 			 * return false to continue verification of this path
775 			 */
776 			return false;
777 		if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
778 			continue;
779 		/* Both old and cur are having same slot_type */
780 		switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
781 		case STACK_SPILL:
782 			/* when explored and current stack slot are both storing
783 			 * spilled registers, check that stored pointers types
784 			 * are the same as well.
785 			 * Ex: explored safe path could have stored
786 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
787 			 * but current path has stored:
788 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
789 			 * such verifier states are not equivalent.
790 			 * return false to continue verification of this path
791 			 */
792 			if (!regsafe(env, &old->stack[spi].spilled_ptr,
793 				     &cur->stack[spi].spilled_ptr, idmap, exact))
794 				return false;
795 			break;
796 		case STACK_DYNPTR:
797 			old_reg = &old->stack[spi].spilled_ptr;
798 			cur_reg = &cur->stack[spi].spilled_ptr;
799 			if (old_reg->dynptr.type != cur_reg->dynptr.type ||
800 			    old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
801 			    !check_ids(old_reg->id, cur_reg->id, idmap) ||
802 			    !check_ids(old_reg->parent_id, cur_reg->parent_id, idmap))
803 				return false;
804 			break;
805 		case STACK_ITER:
806 			old_reg = &old->stack[spi].spilled_ptr;
807 			cur_reg = &cur->stack[spi].spilled_ptr;
808 			/* iter.depth is not compared between states as it
809 			 * doesn't matter for correctness and would otherwise
810 			 * prevent convergence; we maintain it only to prevent
811 			 * infinite loop check triggering, see
812 			 * iter_active_depths_differ()
813 			 */
814 			if (old_reg->iter.btf != cur_reg->iter.btf ||
815 			    old_reg->iter.btf_id != cur_reg->iter.btf_id ||
816 			    old_reg->iter.state != cur_reg->iter.state ||
817 			    /* ignore {old_reg,cur_reg}->iter.depth, see above */
818 			    !check_ids(old_reg->id, cur_reg->id, idmap))
819 				return false;
820 			break;
821 		case STACK_IRQ_FLAG:
822 			old_reg = &old->stack[spi].spilled_ptr;
823 			cur_reg = &cur->stack[spi].spilled_ptr;
824 			if (!check_ids(old_reg->id, cur_reg->id, idmap) ||
825 			    old_reg->irq.kfunc_class != cur_reg->irq.kfunc_class)
826 				return false;
827 			break;
828 		case STACK_MISC:
829 		case STACK_ZERO:
830 		case STACK_INVALID:
831 		case STACK_POISON:
832 			continue;
833 		/* Ensure that new unhandled slot types return false by default */
834 		default:
835 			return false;
836 		}
837 	}
838 	return true;
839 }
840 
841 /*
842  * Compare stack arg slots between old and current states.
843  * Outgoing stack args are path-local state and must agree for pruning.
844  */
845 static bool stack_arg_safe(struct bpf_verifier_env *env, struct bpf_func_state *old,
846 			   struct bpf_func_state *cur, struct bpf_idmap *idmap,
847 			   enum exact_level exact)
848 {
849 	int i, nslots;
850 
851 	nslots = max(old->out_stack_arg_cnt, cur->out_stack_arg_cnt);
852 	for (i = 0; i < nslots; i++) {
853 		struct bpf_reg_state *old_arg, *cur_arg;
854 		struct bpf_reg_state not_init = { .type = NOT_INIT };
855 
856 		old_arg = i < old->out_stack_arg_cnt ?
857 			  &old->stack_arg_regs[i] : &not_init;
858 		cur_arg = i < cur->out_stack_arg_cnt ?
859 			  &cur->stack_arg_regs[i] : &not_init;
860 		if (!regsafe(env, old_arg, cur_arg, idmap, exact))
861 			return false;
862 	}
863 
864 	return true;
865 }
866 
867 static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur,
868 		    struct bpf_idmap *idmap)
869 {
870 	int i;
871 
872 	if (old->acquired_refs != cur->acquired_refs)
873 		return false;
874 
875 	if (old->active_locks != cur->active_locks)
876 		return false;
877 
878 	if (old->active_preempt_locks != cur->active_preempt_locks)
879 		return false;
880 
881 	if (old->active_rcu_locks != cur->active_rcu_locks)
882 		return false;
883 
884 	if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap))
885 		return false;
886 
887 	if (!check_ids(old->active_lock_id, cur->active_lock_id, idmap) ||
888 	    old->active_lock_ptr != cur->active_lock_ptr)
889 		return false;
890 
891 	for (i = 0; i < old->acquired_refs; i++) {
892 		if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) ||
893 		    old->refs[i].type != cur->refs[i].type)
894 			return false;
895 		switch (old->refs[i].type) {
896 		case REF_TYPE_PTR:
897 			if (!check_ids(old->refs[i].parent_id, cur->refs[i].parent_id, idmap))
898 				return false;
899 			break;
900 		case REF_TYPE_IRQ:
901 			break;
902 		case REF_TYPE_LOCK:
903 		case REF_TYPE_RES_LOCK:
904 		case REF_TYPE_RES_LOCK_IRQ:
905 			if (old->refs[i].ptr != cur->refs[i].ptr)
906 				return false;
907 			break;
908 		default:
909 			WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type);
910 			return false;
911 		}
912 	}
913 
914 	return true;
915 }
916 
917 /* compare two verifier states
918  *
919  * all states stored in state_list are known to be valid, since
920  * verifier reached 'bpf_exit' instruction through them
921  *
922  * this function is called when verifier exploring different branches of
923  * execution popped from the state stack. If it sees an old state that has
924  * more strict register state and more strict stack state then this execution
925  * branch doesn't need to be explored further, since verifier already
926  * concluded that more strict state leads to valid finish.
927  *
928  * Therefore two states are equivalent if register state is more conservative
929  * and explored stack state is more conservative than the current one.
930  * Example:
931  *       explored                   current
932  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
933  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
934  *
935  * In other words if current stack state (one being explored) has more
936  * valid slots than old one that already passed validation, it means
937  * the verifier can stop exploring and conclude that current state is valid too
938  *
939  * Similarly with registers. If explored state has register type as invalid
940  * whereas register type in current state is meaningful, it means that
941  * the current state will reach 'bpf_exit' instruction safely
942  */
943 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
944 			      struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact)
945 {
946 	u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before;
947 	u16 i;
948 
949 	if (old->callback_depth > cur->callback_depth)
950 		return false;
951 
952 	if (!old->no_stack_arg_load && cur->no_stack_arg_load)
953 		return false;
954 
955 	for (i = 0; i < MAX_BPF_REG; i++)
956 		if (((1 << i) & live_regs) &&
957 		    !regsafe(env, &old->regs[i], &cur->regs[i],
958 			     &env->idmap_scratch, exact))
959 			return false;
960 
961 	if (!stacksafe(env, old, cur, &env->idmap_scratch, exact))
962 		return false;
963 
964 	if (!stack_arg_safe(env, old, cur, &env->idmap_scratch, exact))
965 		return false;
966 
967 	return true;
968 }
969 
970 static void reset_idmap_scratch(struct bpf_verifier_env *env)
971 {
972 	struct bpf_idmap *idmap = &env->idmap_scratch;
973 
974 	idmap->tmp_id_gen = env->id_gen;
975 	idmap->cnt = 0;
976 }
977 
978 static bool states_equal(struct bpf_verifier_env *env,
979 			 struct bpf_verifier_state *old,
980 			 struct bpf_verifier_state *cur,
981 			 enum exact_level exact)
982 {
983 	u32 insn_idx;
984 	int i;
985 
986 	if (old->curframe != cur->curframe)
987 		return false;
988 
989 	reset_idmap_scratch(env);
990 
991 	/* Verification state from speculative execution simulation
992 	 * must never prune a non-speculative execution one.
993 	 */
994 	if (old->speculative && !cur->speculative)
995 		return false;
996 
997 	if (old->in_sleepable != cur->in_sleepable)
998 		return false;
999 
1000 	if (!refsafe(old, cur, &env->idmap_scratch))
1001 		return false;
1002 
1003 	/* for states to be equal callsites have to be the same
1004 	 * and all frame states need to be equivalent
1005 	 */
1006 	for (i = 0; i <= old->curframe; i++) {
1007 		insn_idx = bpf_frame_insn_idx(old, i);
1008 		if (old->frame[i]->callsite != cur->frame[i]->callsite)
1009 			return false;
1010 		if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact))
1011 			return false;
1012 	}
1013 	return true;
1014 }
1015 
1016 /* find precise scalars in the previous equivalent state and
1017  * propagate them into the current state
1018  */
1019 static int propagate_precision(struct bpf_verifier_env *env,
1020 			       const struct bpf_verifier_state *old,
1021 			       struct bpf_verifier_state *cur,
1022 			       bool *changed)
1023 {
1024 	struct bpf_reg_state *state_reg;
1025 	struct bpf_func_state *state;
1026 	int i, err = 0, fr;
1027 	bool first;
1028 
1029 	for (fr = old->curframe; fr >= 0; fr--) {
1030 		state = old->frame[fr];
1031 		state_reg = state->regs;
1032 		first = true;
1033 		for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
1034 			if (state_reg->type != SCALAR_VALUE ||
1035 			    !state_reg->precise)
1036 				continue;
1037 			if (env->log.level & BPF_LOG_LEVEL2) {
1038 				if (first)
1039 					verbose(env, "frame %d: propagating r%d", fr, i);
1040 				else
1041 					verbose(env, ",r%d", i);
1042 			}
1043 			bpf_bt_set_frame_reg(&env->bt, fr, i);
1044 			first = false;
1045 		}
1046 
1047 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
1048 			if (!bpf_is_spilled_reg(&state->stack[i]))
1049 				continue;
1050 			state_reg = &state->stack[i].spilled_ptr;
1051 			if (state_reg->type != SCALAR_VALUE ||
1052 			    !state_reg->precise)
1053 				continue;
1054 			if (env->log.level & BPF_LOG_LEVEL2) {
1055 				if (first)
1056 					verbose(env, "frame %d: propagating fp%d",
1057 						fr, (-i - 1) * BPF_REG_SIZE);
1058 				else
1059 					verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
1060 			}
1061 			bpf_bt_set_frame_slot(&env->bt, fr, i);
1062 			first = false;
1063 		}
1064 		if (!first && (env->log.level & BPF_LOG_LEVEL2))
1065 			verbose(env, "\n");
1066 	}
1067 
1068 	err = bpf_mark_chain_precision(env, cur, -1, changed);
1069 	if (err < 0)
1070 		return err;
1071 
1072 	return 0;
1073 }
1074 
1075 #define MAX_BACKEDGE_ITERS 64
1076 
1077 /* Propagate read and precision marks from visit->backedges[*].state->equal_state
1078  * to corresponding parent states of visit->backedges[*].state until fixed point is reached,
1079  * then free visit->backedges.
1080  * After execution of this function incomplete_read_marks() will return false
1081  * for all states corresponding to @visit->callchain.
1082  */
1083 static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit)
1084 {
1085 	struct bpf_scc_backedge *backedge;
1086 	struct bpf_verifier_state *st;
1087 	bool changed;
1088 	int i, err;
1089 
1090 	i = 0;
1091 	do {
1092 		if (i++ > MAX_BACKEDGE_ITERS) {
1093 			if (env->log.level & BPF_LOG_LEVEL2)
1094 				verbose(env, "%s: too many iterations\n", __func__);
1095 			for (backedge = visit->backedges; backedge; backedge = backedge->next)
1096 				bpf_mark_all_scalars_precise(env, &backedge->state);
1097 			break;
1098 		}
1099 		changed = false;
1100 		for (backedge = visit->backedges; backedge; backedge = backedge->next) {
1101 			st = &backedge->state;
1102 			err = propagate_precision(env, st->equal_state, st, &changed);
1103 			if (err)
1104 				return err;
1105 		}
1106 	} while (changed);
1107 
1108 	bpf_free_backedges(visit);
1109 	return 0;
1110 }
1111 
1112 static bool states_maybe_looping(struct bpf_verifier_state *old,
1113 				 struct bpf_verifier_state *cur)
1114 {
1115 	struct bpf_func_state *fold, *fcur;
1116 	int i, fr = cur->curframe;
1117 
1118 	if (old->curframe != fr)
1119 		return false;
1120 
1121 	fold = old->frame[fr];
1122 	fcur = cur->frame[fr];
1123 	for (i = 0; i < MAX_BPF_REG; i++)
1124 		if (memcmp(&fold->regs[i], &fcur->regs[i],
1125 			   offsetof(struct bpf_reg_state, frameno)))
1126 			return false;
1127 	return true;
1128 }
1129 
1130 /* is_state_visited() handles iter_next() (see process_iter_next_call() for
1131  * terminology) calls specially: as opposed to bounded BPF loops, it *expects*
1132  * states to match, which otherwise would look like an infinite loop. So while
1133  * iter_next() calls are taken care of, we still need to be careful and
1134  * prevent erroneous and too eager declaration of "infinite loop", when
1135  * iterators are involved.
1136  *
1137  * Here's a situation in pseudo-BPF assembly form:
1138  *
1139  *   0: again:                          ; set up iter_next() call args
1140  *   1:   r1 = &it                      ; <CHECKPOINT HERE>
1141  *   2:   call bpf_iter_num_next        ; this is iter_next() call
1142  *   3:   if r0 == 0 goto done
1143  *   4:   ... something useful here ...
1144  *   5:   goto again                    ; another iteration
1145  *   6: done:
1146  *   7:   r1 = &it
1147  *   8:   call bpf_iter_num_destroy     ; clean up iter state
1148  *   9:   exit
1149  *
1150  * This is a typical loop. Let's assume that we have a prune point at 1:,
1151  * before we get to `call bpf_iter_num_next` (e.g., because of that `goto
1152  * again`, assuming other heuristics don't get in a way).
1153  *
1154  * When we first time come to 1:, let's say we have some state X. We proceed
1155  * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
1156  * Now we come back to validate that forked ACTIVE state. We proceed through
1157  * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
1158  * are converging. But the problem is that we don't know that yet, as this
1159  * convergence has to happen at iter_next() call site only. So if nothing is
1160  * done, at 1: verifier will use bounded loop logic and declare infinite
1161  * looping (and would be *technically* correct, if not for iterator's
1162  * "eventual sticky NULL" contract, see process_iter_next_call()). But we
1163  * don't want that. So what we do in process_iter_next_call() when we go on
1164  * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
1165  * a different iteration. So when we suspect an infinite loop, we additionally
1166  * check if any of the *ACTIVE* iterator states depths differ. If yes, we
1167  * pretend we are not looping and wait for next iter_next() call.
1168  *
1169  * This only applies to ACTIVE state. In DRAINED state we don't expect to
1170  * loop, because that would actually mean infinite loop, as DRAINED state is
1171  * "sticky", and so we'll keep returning into the same instruction with the
1172  * same state (at least in one of possible code paths).
1173  *
1174  * This approach allows to keep infinite loop heuristic even in the face of
1175  * active iterator. E.g., C snippet below is and will be detected as
1176  * infinitely looping:
1177  *
1178  *   struct bpf_iter_num it;
1179  *   int *p, x;
1180  *
1181  *   bpf_iter_num_new(&it, 0, 10);
1182  *   while ((p = bpf_iter_num_next(&t))) {
1183  *       x = p;
1184  *       while (x--) {} // <<-- infinite loop here
1185  *   }
1186  *
1187  */
1188 static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
1189 {
1190 	struct bpf_reg_state *slot, *cur_slot;
1191 	struct bpf_func_state *state;
1192 	int i, fr;
1193 
1194 	for (fr = old->curframe; fr >= 0; fr--) {
1195 		state = old->frame[fr];
1196 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
1197 			if (state->stack[i].slot_type[0] != STACK_ITER)
1198 				continue;
1199 
1200 			slot = &state->stack[i].spilled_ptr;
1201 			if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
1202 				continue;
1203 
1204 			cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
1205 			if (cur_slot->iter.depth != slot->iter.depth)
1206 				return true;
1207 		}
1208 	}
1209 	return false;
1210 }
1211 
1212 static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1213 {
1214 	struct bpf_func_state *func;
1215 	struct bpf_reg_state *reg;
1216 	int i, j;
1217 
1218 	for (i = 0; i <= st->curframe; i++) {
1219 		func = st->frame[i];
1220 		for (j = 0; j < BPF_REG_FP; j++) {
1221 			reg = &func->regs[j];
1222 			if (reg->type != SCALAR_VALUE)
1223 				continue;
1224 			reg->precise = false;
1225 		}
1226 		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
1227 			if (!bpf_is_spilled_reg(&func->stack[j]))
1228 				continue;
1229 			reg = &func->stack[j].spilled_ptr;
1230 			if (reg->type != SCALAR_VALUE)
1231 				continue;
1232 			reg->precise = false;
1233 		}
1234 	}
1235 }
1236 
1237 int bpf_is_state_visited(struct bpf_verifier_env *env, int insn_idx)
1238 {
1239 	struct bpf_verifier_state_list *new_sl;
1240 	struct bpf_verifier_state_list *sl;
1241 	struct bpf_verifier_state *cur = env->cur_state, *new;
1242 	bool force_new_state, add_new_state, loop;
1243 	int n, err, states_cnt = 0;
1244 	struct list_head *pos, *tmp, *head;
1245 
1246 	force_new_state = env->test_state_freq || bpf_is_force_checkpoint(env, insn_idx) ||
1247 			  /* Avoid accumulating infinitely long jmp history */
1248 			  cur->jmp_history_cnt > 40;
1249 
1250 	/* bpf progs typically have pruning point every 4 instructions
1251 	 * http://vger.kernel.org/bpfconf2019.html#session-1
1252 	 * Do not add new state for future pruning if the verifier hasn't seen
1253 	 * at least 2 jumps and at least 8 instructions.
1254 	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
1255 	 * In tests that amounts to up to 50% reduction into total verifier
1256 	 * memory consumption and 20% verifier time speedup.
1257 	 */
1258 	add_new_state = force_new_state;
1259 	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
1260 	    env->insn_processed - env->prev_insn_processed >= 8)
1261 		add_new_state = true;
1262 
1263 	/* keep cleaning the current state as registers/stack become dead */
1264 	err = clean_verifier_state(env, cur);
1265 	if (err)
1266 		return err;
1267 
1268 	loop = false;
1269 	head = bpf_explored_state(env, insn_idx);
1270 	list_for_each_safe(pos, tmp, head) {
1271 		sl = container_of(pos, struct bpf_verifier_state_list, node);
1272 		states_cnt++;
1273 		if (sl->state.insn_idx != insn_idx)
1274 			continue;
1275 
1276 		if (sl->state.branches) {
1277 			struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
1278 
1279 			if (frame->in_async_callback_fn &&
1280 			    frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
1281 				/* Different async_entry_cnt means that the verifier is
1282 				 * processing another entry into async callback.
1283 				 * Seeing the same state is not an indication of infinite
1284 				 * loop or infinite recursion.
1285 				 * But finding the same state doesn't mean that it's safe
1286 				 * to stop processing the current state. The previous state
1287 				 * hasn't yet reached bpf_exit, since state.branches > 0.
1288 				 * Checking in_async_callback_fn alone is not enough either.
1289 				 * Since the verifier still needs to catch infinite loops
1290 				 * inside async callbacks.
1291 				 */
1292 				goto skip_inf_loop_check;
1293 			}
1294 			/* BPF open-coded iterators loop detection is special.
1295 			 * states_maybe_looping() logic is too simplistic in detecting
1296 			 * states that *might* be equivalent, because it doesn't know
1297 			 * about ID remapping, so don't even perform it.
1298 			 * See process_iter_next_call() and iter_active_depths_differ()
1299 			 * for overview of the logic. When current and one of parent
1300 			 * states are detected as equivalent, it's a good thing: we prove
1301 			 * convergence and can stop simulating further iterations.
1302 			 * It's safe to assume that iterator loop will finish, taking into
1303 			 * account iter_next() contract of eventually returning
1304 			 * sticky NULL result.
1305 			 *
1306 			 * Note, that states have to be compared exactly in this case because
1307 			 * read and precision marks might not be finalized inside the loop.
1308 			 * E.g. as in the program below:
1309 			 *
1310 			 *     1. r7 = -16
1311 			 *     2. r6 = bpf_get_prandom_u32()
1312 			 *     3. while (bpf_iter_num_next(&fp[-8])) {
1313 			 *     4.   if (r6 != 42) {
1314 			 *     5.     r7 = -32
1315 			 *     6.     r6 = bpf_get_prandom_u32()
1316 			 *     7.     continue
1317 			 *     8.   }
1318 			 *     9.   r0 = r10
1319 			 *    10.   r0 += r7
1320 			 *    11.   r8 = *(u64 *)(r0 + 0)
1321 			 *    12.   r6 = bpf_get_prandom_u32()
1322 			 *    13. }
1323 			 *
1324 			 * Here verifier would first visit path 1-3, create a checkpoint at 3
1325 			 * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
1326 			 * not have read or precision mark for r7 yet, thus inexact states
1327 			 * comparison would discard current state with r7=-32
1328 			 * => unsafe memory access at 11 would not be caught.
1329 			 */
1330 			if (is_iter_next_insn(env, insn_idx)) {
1331 				if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
1332 					struct bpf_func_state *cur_frame;
1333 					struct bpf_reg_state *iter_state, *iter_reg;
1334 					int spi;
1335 
1336 					cur_frame = cur->frame[cur->curframe];
1337 					/* btf_check_iter_kfuncs() enforces that
1338 					 * iter state pointer is always the first arg
1339 					 */
1340 					iter_reg = &cur_frame->regs[BPF_REG_1];
1341 					/* current state is valid due to states_equal(),
1342 					 * so we can assume valid iter and reg state,
1343 					 * no need for extra (re-)validations
1344 					 */
1345 					spi = bpf_get_spi(iter_reg->var_off.value);
1346 					iter_state = &bpf_func(env, iter_reg)->stack[spi].spilled_ptr;
1347 					if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
1348 						loop = true;
1349 						goto hit;
1350 					}
1351 				}
1352 				goto skip_inf_loop_check;
1353 			}
1354 			if (is_may_goto_insn_at(env, insn_idx)) {
1355 				if (sl->state.may_goto_depth != cur->may_goto_depth &&
1356 				    states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
1357 					loop = true;
1358 					goto hit;
1359 				}
1360 			}
1361 			if (bpf_calls_callback(env, insn_idx)) {
1362 				if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
1363 					loop = true;
1364 					goto hit;
1365 				}
1366 				goto skip_inf_loop_check;
1367 			}
1368 			/* attempt to detect infinite loop to avoid unnecessary doomed work */
1369 			if (states_maybe_looping(&sl->state, cur) &&
1370 			    states_equal(env, &sl->state, cur, EXACT) &&
1371 			    !iter_active_depths_differ(&sl->state, cur) &&
1372 			    sl->state.may_goto_depth == cur->may_goto_depth &&
1373 			    sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
1374 				verbose_linfo(env, insn_idx, "; ");
1375 				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
1376 				verbose(env, "cur state:");
1377 				print_verifier_state(env, cur, cur->curframe, true);
1378 				verbose(env, "old state:");
1379 				print_verifier_state(env, &sl->state, cur->curframe, true);
1380 				return -EINVAL;
1381 			}
1382 			/* if the verifier is processing a loop, avoid adding new state
1383 			 * too often, since different loop iterations have distinct
1384 			 * states and may not help future pruning.
1385 			 * This threshold shouldn't be too low to make sure that
1386 			 * a loop with large bound will be rejected quickly.
1387 			 * The most abusive loop will be:
1388 			 * r1 += 1
1389 			 * if r1 < 1000000 goto pc-2
1390 			 * 1M insn_procssed limit / 100 == 10k peak states.
1391 			 * This threshold shouldn't be too high either, since states
1392 			 * at the end of the loop are likely to be useful in pruning.
1393 			 */
1394 skip_inf_loop_check:
1395 			if (!force_new_state &&
1396 			    env->jmps_processed - env->prev_jmps_processed < 20 &&
1397 			    env->insn_processed - env->prev_insn_processed < 100)
1398 				add_new_state = false;
1399 			goto miss;
1400 		}
1401 		/* See comments for mark_all_regs_read_and_precise() */
1402 		loop = incomplete_read_marks(env, &sl->state);
1403 		if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) {
1404 hit:
1405 			sl->hit_cnt++;
1406 
1407 			/* if previous state reached the exit with precision and
1408 			 * current state is equivalent to it (except precision marks)
1409 			 * the precision needs to be propagated back in
1410 			 * the current state.
1411 			 */
1412 			err = 0;
1413 			if (bpf_is_jmp_point(env, env->insn_idx))
1414 				err = bpf_push_jmp_history(env, cur, 0, 0, 0, 0);
1415 			err = err ? : propagate_precision(env, &sl->state, cur, NULL);
1416 			if (err)
1417 				return err;
1418 			/* When processing iterator based loops above propagate_liveness and
1419 			 * propagate_precision calls are not sufficient to transfer all relevant
1420 			 * read and precision marks. E.g. consider the following case:
1421 			 *
1422 			 *  .-> A --.  Assume the states are visited in the order A, B, C.
1423 			 *  |   |   |  Assume that state B reaches a state equivalent to state A.
1424 			 *  |   v   v  At this point, state C is not processed yet, so state A
1425 			 *  '-- B   C  has not received any read or precision marks from C.
1426 			 *             Thus, marks propagated from A to B are incomplete.
1427 			 *
1428 			 * The verifier mitigates this by performing the following steps:
1429 			 *
1430 			 * - Prior to the main verification pass, strongly connected components
1431 			 *   (SCCs) are computed over the program's control flow graph,
1432 			 *   intraprocedurally.
1433 			 *
1434 			 * - During the main verification pass, `maybe_enter_scc()` checks
1435 			 *   whether the current verifier state is entering an SCC. If so, an
1436 			 *   instance of a `bpf_scc_visit` object is created, and the state
1437 			 *   entering the SCC is recorded as the entry state.
1438 			 *
1439 			 * - This instance is associated not with the SCC itself, but with a
1440 			 *   `bpf_scc_callchain`: a tuple consisting of the call sites leading to
1441 			 *   the SCC and the SCC id. See `compute_scc_callchain()`.
1442 			 *
1443 			 * - When a verification path encounters a `states_equal(...,
1444 			 *   RANGE_WITHIN)` condition, there exists a call chain describing the
1445 			 *   current state and a corresponding `bpf_scc_visit` instance. A copy
1446 			 *   of the current state is created and added to
1447 			 *   `bpf_scc_visit->backedges`.
1448 			 *
1449 			 * - When a verification path terminates, `maybe_exit_scc()` is called
1450 			 *   from `bpf_update_branch_counts()`. For states with `branches == 0`, it
1451 			 *   checks whether the state is the entry state of any `bpf_scc_visit`
1452 			 *   instance. If it is, this indicates that all paths originating from
1453 			 *   this SCC visit have been explored. `propagate_backedges()` is then
1454 			 *   called, which propagates read and precision marks through the
1455 			 *   backedges until a fixed point is reached.
1456 			 *   (In the earlier example, this would propagate marks from A to B,
1457 			 *    from C to A, and then again from A to B.)
1458 			 *
1459 			 * A note on callchains
1460 			 * --------------------
1461 			 *
1462 			 * Consider the following example:
1463 			 *
1464 			 *     void foo() { loop { ... SCC#1 ... } }
1465 			 *     void main() {
1466 			 *       A: foo();
1467 			 *       B: ...
1468 			 *       C: foo();
1469 			 *     }
1470 			 *
1471 			 * Here, there are two distinct callchains leading to SCC#1:
1472 			 * - (A, SCC#1)
1473 			 * - (C, SCC#1)
1474 			 *
1475 			 * Each callchain identifies a separate `bpf_scc_visit` instance that
1476 			 * accumulates backedge states. The `propagate_{liveness,precision}()`
1477 			 * functions traverse the parent state of each backedge state, which
1478 			 * means these parent states must remain valid (i.e., not freed) while
1479 			 * the corresponding `bpf_scc_visit` instance exists.
1480 			 *
1481 			 * Associating `bpf_scc_visit` instances directly with SCCs instead of
1482 			 * callchains would break this invariant:
1483 			 * - States explored during `C: foo()` would contribute backedges to
1484 			 *   SCC#1, but SCC#1 would only be exited once the exploration of
1485 			 *   `A: foo()` completes.
1486 			 * - By that time, the states explored between `A: foo()` and `C: foo()`
1487 			 *   (i.e., `B: ...`) may have already been freed, causing the parent
1488 			 *   links for states from `C: foo()` to become invalid.
1489 			 */
1490 			if (loop) {
1491 				struct bpf_scc_backedge *backedge;
1492 
1493 				backedge = kzalloc_obj(*backedge,
1494 						       GFP_KERNEL_ACCOUNT);
1495 				if (!backedge)
1496 					return -ENOMEM;
1497 				err = bpf_copy_verifier_state(&backedge->state, cur);
1498 				backedge->state.equal_state = &sl->state;
1499 				backedge->state.insn_idx = insn_idx;
1500 				err = err ?: add_scc_backedge(env, &sl->state, backedge);
1501 				if (err) {
1502 					bpf_free_verifier_state(&backedge->state, false);
1503 					kfree(backedge);
1504 					return err;
1505 				}
1506 			}
1507 			return 1;
1508 		}
1509 miss:
1510 		/* when new state is not going to be added do not increase miss count.
1511 		 * Otherwise several loop iterations will remove the state
1512 		 * recorded earlier. The goal of these heuristics is to have
1513 		 * states from some iterations of the loop (some in the beginning
1514 		 * and some at the end) to help pruning.
1515 		 */
1516 		if (add_new_state)
1517 			sl->miss_cnt++;
1518 		/* heuristic to determine whether this state is beneficial
1519 		 * to keep checking from state equivalence point of view.
1520 		 * Higher numbers increase max_states_per_insn and verification time,
1521 		 * but do not meaningfully decrease insn_processed.
1522 		 * 'n' controls how many times state could miss before eviction.
1523 		 * Use bigger 'n' for checkpoints because evicting checkpoint states
1524 		 * too early would hinder iterator convergence.
1525 		 */
1526 		n = bpf_is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
1527 		if (sl->miss_cnt > sl->hit_cnt * n + n) {
1528 			/* the state is unlikely to be useful. Remove it to
1529 			 * speed up verification
1530 			 */
1531 			sl->in_free_list = true;
1532 			list_del(&sl->node);
1533 			list_add(&sl->node, &env->free_list);
1534 			env->free_list_size++;
1535 			env->explored_states_size--;
1536 			maybe_free_verifier_state(env, sl);
1537 		}
1538 	}
1539 
1540 	if (env->max_states_per_insn < states_cnt)
1541 		env->max_states_per_insn = states_cnt;
1542 
1543 	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
1544 		return 0;
1545 
1546 	if (!add_new_state)
1547 		return 0;
1548 
1549 	/* There were no equivalent states, remember the current one.
1550 	 * Technically the current state is not proven to be safe yet,
1551 	 * but it will either reach outer most bpf_exit (which means it's safe)
1552 	 * or it will be rejected. When there are no loops the verifier won't be
1553 	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
1554 	 * again on the way to bpf_exit.
1555 	 * When looping the sl->state.branches will be > 0 and this state
1556 	 * will not be considered for equivalence until branches == 0.
1557 	 */
1558 	new_sl = kzalloc_obj(struct bpf_verifier_state_list, GFP_KERNEL_ACCOUNT);
1559 	if (!new_sl)
1560 		return -ENOMEM;
1561 	env->total_states++;
1562 	env->explored_states_size++;
1563 	update_peak_states(env);
1564 	env->prev_jmps_processed = env->jmps_processed;
1565 	env->prev_insn_processed = env->insn_processed;
1566 
1567 	/* forget precise markings we inherited, see __mark_chain_precision */
1568 	if (env->bpf_capable)
1569 		mark_all_scalars_imprecise(env, cur);
1570 
1571 	bpf_clear_singular_ids(env, cur);
1572 
1573 	/* add new state to the head of linked list */
1574 	new = &new_sl->state;
1575 	err = bpf_copy_verifier_state(new, cur);
1576 	if (err) {
1577 		bpf_free_verifier_state(new, false);
1578 		kfree(new_sl);
1579 		return err;
1580 	}
1581 	new->insn_idx = insn_idx;
1582 	verifier_bug_if(new->branches != 1, env,
1583 			"%s:branches_to_explore=%d insn %d",
1584 			__func__, new->branches, insn_idx);
1585 	err = maybe_enter_scc(env, new);
1586 	if (err) {
1587 		bpf_free_verifier_state(new, false);
1588 		kfree(new_sl);
1589 		return err;
1590 	}
1591 
1592 	cur->parent = new;
1593 	cur->first_insn_idx = insn_idx;
1594 	cur->dfs_depth = new->dfs_depth + 1;
1595 	bpf_clear_jmp_history(cur);
1596 	list_add(&new_sl->node, head);
1597 	return 0;
1598 }
1599