xref: /linux/kernel/bpf/backtrack.c (revision 056e065a6b6e01ab54bb9770c0d5a15350e571e2)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
3 #include <linux/bpf.h>
4 #include <linux/bpf_verifier.h>
5 #include <linux/filter.h>
6 #include <linux/bitmap.h>
7 
8 #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
9 
10 /* for any branch, call, exit record the history of jmps in the given state */
11 int bpf_push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
12 			 int insn_flags, int spi, int frame, u64 linked_regs)
13 {
14 	u32 cnt = cur->jmp_history_cnt;
15 	struct bpf_jmp_history_entry *p;
16 	size_t alloc_size;
17 
18 	/* combine instruction flags if we already recorded this instruction */
19 	if (env->cur_hist_ent) {
20 		/* atomic instructions push insn_flags twice, for READ and
21 		 * WRITE sides, but they should agree on stack slot
22 		 */
23 		verifier_bug_if((env->cur_hist_ent->flags & insn_flags) &&
24 				(env->cur_hist_ent->flags & insn_flags) != insn_flags,
25 				env, "insn history: insn_idx %d cur flags %x new flags %x",
26 				env->insn_idx, env->cur_hist_ent->flags, insn_flags);
27 		env->cur_hist_ent->flags |= insn_flags;
28 		env->cur_hist_ent->spi = spi;
29 		env->cur_hist_ent->frame = frame;
30 		verifier_bug_if(env->cur_hist_ent->linked_regs != 0, env,
31 				"insn history: insn_idx %d linked_regs: %#llx",
32 				env->insn_idx, env->cur_hist_ent->linked_regs);
33 		env->cur_hist_ent->linked_regs = linked_regs;
34 		return 0;
35 	}
36 
37 	cnt++;
38 	alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
39 	p = krealloc(cur->jmp_history, alloc_size, GFP_KERNEL_ACCOUNT);
40 	if (!p)
41 		return -ENOMEM;
42 	cur->jmp_history = p;
43 
44 	p = &cur->jmp_history[cnt - 1];
45 	p->idx = env->insn_idx;
46 	p->prev_idx = env->prev_insn_idx;
47 	p->flags = insn_flags;
48 	p->spi = spi;
49 	p->frame = frame;
50 	p->linked_regs = linked_regs;
51 	cur->jmp_history_cnt = cnt;
52 	env->cur_hist_ent = p;
53 
54 	return 0;
55 }
56 
57 static bool is_atomic_load_insn(const struct bpf_insn *insn)
58 {
59 	return BPF_CLASS(insn->code) == BPF_STX &&
60 	       BPF_MODE(insn->code) == BPF_ATOMIC &&
61 	       insn->imm == BPF_LOAD_ACQ;
62 }
63 
64 static bool is_atomic_fetch_insn(const struct bpf_insn *insn)
65 {
66 	return BPF_CLASS(insn->code) == BPF_STX &&
67 	       BPF_MODE(insn->code) == BPF_ATOMIC &&
68 	       (insn->imm & BPF_FETCH);
69 }
70 
71 /* Backtrack one insn at a time. If idx is not at the top of recorded
72  * history then previous instruction came from straight line execution.
73  * Return -ENOENT if we exhausted all instructions within given state.
74  *
75  * It's legal to have a bit of a looping with the same starting and ending
76  * insn index within the same state, e.g.: 3->4->5->3, so just because current
77  * instruction index is the same as state's first_idx doesn't mean we are
78  * done. If there is still some jump history left, we should keep going. We
79  * need to take into account that we might have a jump history between given
80  * state's parent and itself, due to checkpointing. In this case, we'll have
81  * history entry recording a jump from last instruction of parent state and
82  * first instruction of given state.
83  */
84 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
85 			     u32 *history)
86 {
87 	u32 cnt = *history;
88 
89 	if (i == st->first_insn_idx) {
90 		if (cnt == 0)
91 			return -ENOENT;
92 		if (cnt == 1 && st->jmp_history[0].idx == i)
93 			return -ENOENT;
94 	}
95 
96 	if (cnt && st->jmp_history[cnt - 1].idx == i) {
97 		i = st->jmp_history[cnt - 1].prev_idx;
98 		(*history)--;
99 	} else {
100 		i--;
101 	}
102 	return i;
103 }
104 
105 static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st,
106 						        u32 hist_end, int insn_idx)
107 {
108 	if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx)
109 		return &st->jmp_history[hist_end - 1];
110 	return NULL;
111 }
112 
113 static inline void bt_init(struct backtrack_state *bt, u32 frame)
114 {
115 	bt->frame = frame;
116 }
117 
118 static inline void bt_reset(struct backtrack_state *bt)
119 {
120 	struct bpf_verifier_env *env = bt->env;
121 
122 	memset(bt, 0, sizeof(*bt));
123 	bt->env = env;
124 }
125 
126 static inline u32 bt_empty(struct backtrack_state *bt)
127 {
128 	u64 mask = 0;
129 	int i;
130 
131 	for (i = 0; i <= bt->frame; i++)
132 		mask |= bt->reg_masks[i] | bt->stack_masks[i] | bt->stack_arg_masks[i];
133 
134 	return mask == 0;
135 }
136 
137 static inline void bt_clear_frame_stack_arg_slot(struct backtrack_state *bt, u32 frame, u32 slot)
138 {
139 	bt->stack_arg_masks[frame] &= ~(1 << slot);
140 }
141 
142 static inline bool bt_is_frame_stack_arg_slot_set(struct backtrack_state *bt, u32 frame, u32 slot)
143 {
144 	return bt->stack_arg_masks[frame] & (1 << slot);
145 }
146 
147 static inline int bt_subprog_enter(struct backtrack_state *bt)
148 {
149 	if (bt->frame == MAX_CALL_FRAMES - 1) {
150 		verifier_bug(bt->env, "subprog enter from frame %d", bt->frame);
151 		return -EFAULT;
152 	}
153 	bt->frame++;
154 	return 0;
155 }
156 
157 static inline int bt_subprog_exit(struct backtrack_state *bt)
158 {
159 	if (bt->frame == 0) {
160 		verifier_bug(bt->env, "subprog exit from frame 0");
161 		return -EFAULT;
162 	}
163 	bt->frame--;
164 	return 0;
165 }
166 
167 static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
168 {
169 	bt->reg_masks[frame] &= ~(1 << reg);
170 }
171 
172 static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
173 {
174 	bpf_bt_set_frame_reg(bt, bt->frame, reg);
175 }
176 
177 static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
178 {
179 	bt_clear_frame_reg(bt, bt->frame, reg);
180 }
181 
182 static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
183 {
184 	bt->stack_masks[frame] &= ~(1ull << slot);
185 }
186 
187 static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
188 {
189 	return bt->reg_masks[frame];
190 }
191 
192 static inline u32 bt_reg_mask(struct backtrack_state *bt)
193 {
194 	return bt->reg_masks[bt->frame];
195 }
196 
197 static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
198 {
199 	return bt->stack_masks[frame];
200 }
201 
202 static inline u64 bt_stack_mask(struct backtrack_state *bt)
203 {
204 	return bt->stack_masks[bt->frame];
205 }
206 
207 static inline u8 bt_stack_arg_mask(struct backtrack_state *bt)
208 {
209 	return bt->stack_arg_masks[bt->frame];
210 }
211 
212 static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
213 {
214 	return bt->reg_masks[bt->frame] & (1 << reg);
215 }
216 
217 
218 /* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
219 static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
220 {
221 	DECLARE_BITMAP(mask, 64);
222 	bool first = true;
223 	int i, n;
224 
225 	buf[0] = '\0';
226 
227 	bitmap_from_u64(mask, reg_mask);
228 	for_each_set_bit(i, mask, 32) {
229 		n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i);
230 		first = false;
231 		buf += n;
232 		buf_sz -= n;
233 		if (buf_sz < 0)
234 			break;
235 	}
236 }
237 /* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
238 void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
239 {
240 	DECLARE_BITMAP(mask, 64);
241 	bool first = true;
242 	int i, n;
243 
244 	buf[0] = '\0';
245 
246 	bitmap_from_u64(mask, stack_mask);
247 	for_each_set_bit(i, mask, 64) {
248 		n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8);
249 		first = false;
250 		buf += n;
251 		buf_sz -= n;
252 		if (buf_sz < 0)
253 			break;
254 	}
255 }
256 
257 
258 /* For given verifier state backtrack_insn() is called from the last insn to
259  * the first insn. Its purpose is to compute a bitmask of registers and
260  * stack slots that needs precision in the parent verifier state.
261  *
262  * @idx is an index of the instruction we are currently processing;
263  * @subseq_idx is an index of the subsequent instruction that:
264  *   - *would be* executed next, if jump history is viewed in forward order;
265  *   - *was* processed previously during backtracking.
266  */
267 static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
268 			  struct bpf_jmp_history_entry *hist, struct backtrack_state *bt)
269 {
270 	struct bpf_insn *insn = env->prog->insnsi + idx;
271 	u8 class = BPF_CLASS(insn->code);
272 	u8 opcode = BPF_OP(insn->code);
273 	u8 mode = BPF_MODE(insn->code);
274 	u32 dreg = insn->dst_reg;
275 	u32 sreg = insn->src_reg;
276 	u32 spi, i, fr;
277 
278 	if (insn->code == 0)
279 		return 0;
280 	if (env->log.level & BPF_LOG_LEVEL2) {
281 		fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
282 		verbose(env, "mark_precise: frame%d: regs=%s ",
283 			bt->frame, env->tmp_str_buf);
284 		bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
285 		verbose(env, "stack=%s before ", env->tmp_str_buf);
286 		verbose(env, "%d: ", idx);
287 		bpf_verbose_insn(env, insn);
288 	}
289 
290 	/* If there is a history record that some registers gained range at this insn,
291 	 * propagate precision marks to those registers, so that bt_is_reg_set()
292 	 * accounts for these registers.
293 	 */
294 	bpf_bt_sync_linked_regs(bt, hist);
295 
296 	if (class == BPF_ALU || class == BPF_ALU64) {
297 		if (!bt_is_reg_set(bt, dreg))
298 			return 0;
299 		if (opcode == BPF_END || opcode == BPF_NEG) {
300 			/* sreg is reserved and unused
301 			 * dreg still need precision before this insn
302 			 */
303 			return 0;
304 		} else if (opcode == BPF_MOV) {
305 			if (BPF_SRC(insn->code) == BPF_X) {
306 				/* dreg = sreg or dreg = (s8, s16, s32)sreg
307 				 * dreg needs precision after this insn
308 				 * sreg needs precision before this insn
309 				 */
310 				bt_clear_reg(bt, dreg);
311 				if (sreg != BPF_REG_FP)
312 					bt_set_reg(bt, sreg);
313 			} else {
314 				/* dreg = K
315 				 * dreg needs precision after this insn.
316 				 * Corresponding register is already marked
317 				 * as precise=true in this verifier state.
318 				 * No further markings in parent are necessary
319 				 */
320 				bt_clear_reg(bt, dreg);
321 			}
322 		} else {
323 			if (BPF_SRC(insn->code) == BPF_X) {
324 				/* dreg += sreg
325 				 * both dreg and sreg need precision
326 				 * before this insn
327 				 */
328 				if (sreg != BPF_REG_FP)
329 					bt_set_reg(bt, sreg);
330 			} /* else dreg += K
331 			   * dreg still needs precision before this insn
332 			   */
333 		}
334 	} else if (class == BPF_LDX ||
335 		   is_atomic_load_insn(insn) ||
336 		   is_atomic_fetch_insn(insn)) {
337 		u32 load_reg = dreg;
338 
339 		/*
340 		 * Atomic fetch operation writes the old value into
341 		 * a register (sreg or r0) and if it was tracked for
342 		 * precision, propagate to the stack slot like we do
343 		 * in regular ldx.
344 		 */
345 		if (is_atomic_fetch_insn(insn))
346 			load_reg = insn->imm == BPF_CMPXCHG ?
347 				   BPF_REG_0 : sreg;
348 
349 		if (!bt_is_reg_set(bt, load_reg))
350 			return 0;
351 		bt_clear_reg(bt, load_reg);
352 
353 		if (hist && hist->flags & INSN_F_STACK_ARG_ACCESS) {
354 			spi = hist->spi;
355 			/*
356 			 * Stack arg read: callee reads from r11+off, but
357 			 * the data lives in the caller's stack_arg_regs.
358 			 * Set the mask in the caller frame so precision
359 			 * is marked in the caller's slot at the callee
360 			 * entry checkpoint.
361 			 */
362 			bt_set_frame_stack_arg_slot(bt, bt->frame - 1, spi);
363 			return 0;
364 		}
365 
366 		/* scalars can only be spilled into stack w/o losing precision.
367 		 * Load from any other memory can be zero extended.
368 		 * The desire to keep that precision is already indicated
369 		 * by 'precise' mark in corresponding register of this state.
370 		 * No further tracking necessary.
371 		 */
372 		if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
373 			return 0;
374 		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
375 		 * that [fp - off] slot contains scalar that needs to be
376 		 * tracked with precision
377 		 */
378 		spi = hist->spi;
379 		fr = hist->frame;
380 		bpf_bt_set_frame_slot(bt, fr, spi);
381 	} else if (class == BPF_STX || class == BPF_ST) {
382 		if (bt_is_reg_set(bt, dreg))
383 			/* stx & st shouldn't be using _scalar_ dst_reg
384 			 * to access memory. It means backtracking
385 			 * encountered a case of pointer subtraction.
386 			 */
387 			return -ENOTSUPP;
388 
389 		if (hist && hist->flags & INSN_F_STACK_ARG_ACCESS) {
390 			spi = hist->spi;
391 			if (!bt_is_frame_stack_arg_slot_set(bt, bt->frame, spi))
392 				return 0;
393 			bt_clear_frame_stack_arg_slot(bt, bt->frame, spi);
394 			if (class == BPF_STX)
395 				bt_set_reg(bt, sreg);
396 			return 0;
397 		}
398 
399 		/* scalars can only be spilled into stack */
400 		if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
401 			return 0;
402 		spi = hist->spi;
403 		fr = hist->frame;
404 		if (!bt_is_frame_slot_set(bt, fr, spi))
405 			return 0;
406 		bt_clear_frame_slot(bt, fr, spi);
407 		if (class == BPF_STX)
408 			bt_set_reg(bt, sreg);
409 	} else if (class == BPF_JMP || class == BPF_JMP32) {
410 		if (bpf_pseudo_call(insn)) {
411 			int subprog_insn_idx, subprog;
412 
413 			subprog_insn_idx = idx + insn->imm + 1;
414 			subprog = bpf_find_subprog(env, subprog_insn_idx);
415 			if (subprog < 0)
416 				return -EFAULT;
417 
418 			if (bpf_subprog_is_global(env, subprog)) {
419 				/* check that jump history doesn't have any
420 				 * extra instructions from subprog; the next
421 				 * instruction after call to global subprog
422 				 * should be literally next instruction in
423 				 * caller program
424 				 */
425 				verifier_bug_if(idx + 1 != subseq_idx, env,
426 						"extra insn from subprog");
427 				/* r1-r5 are invalidated after subprog call,
428 				 * so for global func call it shouldn't be set
429 				 * anymore
430 				 */
431 				if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
432 					verifier_bug(env, "global subprog unexpected regs %x",
433 						     bt_reg_mask(bt));
434 					return -EFAULT;
435 				}
436 				/* global subprog always sets R0 */
437 				bt_clear_reg(bt, BPF_REG_0);
438 				return 0;
439 			} else {
440 				/* static subprog call instruction, which
441 				 * means that we are exiting current subprog,
442 				 * so only r1-r5 could be still requested as
443 				 * precise, r0 and r6-r10 or any stack slot in
444 				 * the current frame should be zero by now
445 				 */
446 				if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
447 					verifier_bug(env, "static subprog unexpected regs %x",
448 						     bt_reg_mask(bt));
449 					return -EFAULT;
450 				}
451 				/* we are now tracking register spills correctly,
452 				 * so any instance of leftover slots is a bug
453 				 */
454 				if (bt_stack_mask(bt) != 0) {
455 					verifier_bug(env,
456 						     "static subprog leftover stack slots %llx",
457 						     bt_stack_mask(bt));
458 					return -EFAULT;
459 				}
460 				/* propagate r1-r5 to the caller */
461 				for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
462 					if (bt_is_reg_set(bt, i)) {
463 						bt_clear_reg(bt, i);
464 						bpf_bt_set_frame_reg(bt, bt->frame - 1, i);
465 					}
466 				}
467 				if (bt_stack_arg_mask(bt)) {
468 					verifier_bug(env,
469 						     "static subprog leftover stack arg slots %x",
470 						     bt_stack_arg_mask(bt));
471 					return -EFAULT;
472 				}
473 				if (bt_subprog_exit(bt))
474 					return -EFAULT;
475 				return 0;
476 			}
477 		} else if (bpf_is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
478 			/* exit from callback subprog to callback-calling helper or
479 			 * kfunc call. Use idx/subseq_idx check to discern it from
480 			 * straight line code backtracking.
481 			 * Unlike the subprog call handling above, we shouldn't
482 			 * propagate precision of r1-r5 (if any requested), as they are
483 			 * not actually arguments passed directly to callback subprogs
484 			 */
485 			if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
486 				verifier_bug(env, "callback unexpected regs %x",
487 					     bt_reg_mask(bt));
488 				return -EFAULT;
489 			}
490 			if (bt_stack_mask(bt) != 0) {
491 				verifier_bug(env, "callback leftover stack slots %llx",
492 					     bt_stack_mask(bt));
493 				return -EFAULT;
494 			}
495 			/* clear r1-r5 in callback subprog's mask */
496 			for (i = BPF_REG_1; i <= BPF_REG_5; i++)
497 				bt_clear_reg(bt, i);
498 			if (bt_subprog_exit(bt))
499 				return -EFAULT;
500 			return 0;
501 		} else if (opcode == BPF_CALL) {
502 			/* kfunc with imm==0 is invalid and fixup_kfunc_call will
503 			 * catch this error later. Make backtracking conservative
504 			 * with ENOTSUPP.
505 			 */
506 			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
507 				return -ENOTSUPP;
508 			/* regular helper call sets R0 */
509 			bt_clear_reg(bt, BPF_REG_0);
510 			if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
511 				/* if backtracking was looking for registers R1-R5
512 				 * they should have been found already.
513 				 */
514 				verifier_bug(env, "backtracking call unexpected regs %x",
515 					     bt_reg_mask(bt));
516 				return -EFAULT;
517 			}
518 			if (insn->src_reg == BPF_REG_0 && insn->imm == BPF_FUNC_tail_call
519 			    && subseq_idx - idx != 1) {
520 				if (bt_subprog_enter(bt))
521 					return -EFAULT;
522 			}
523 		} else if (opcode == BPF_EXIT) {
524 			bool r0_precise;
525 
526 			/* Backtracking to a nested function call, 'idx' is a part of
527 			 * the inner frame 'subseq_idx' is a part of the outer frame.
528 			 * In case of a regular function call, instructions giving
529 			 * precision to registers R1-R5 should have been found already.
530 			 * In case of a callback, it is ok to have R1-R5 marked for
531 			 * backtracking, as these registers are set by the function
532 			 * invoking callback.
533 			 */
534 			if (subseq_idx >= 0 && bpf_calls_callback(env, subseq_idx))
535 				for (i = BPF_REG_1; i <= BPF_REG_5; i++)
536 					bt_clear_reg(bt, i);
537 			if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
538 				verifier_bug(env, "backtracking exit unexpected regs %x",
539 					     bt_reg_mask(bt));
540 				return -EFAULT;
541 			}
542 
543 			/* BPF_EXIT in subprog or callback always returns
544 			 * right after the call instruction, so by checking
545 			 * whether the instruction at subseq_idx-1 is subprog
546 			 * call or not we can distinguish actual exit from
547 			 * *subprog* from exit from *callback*. In the former
548 			 * case, we need to propagate r0 precision, if
549 			 * necessary. In the former we never do that.
550 			 */
551 			r0_precise = subseq_idx - 1 >= 0 &&
552 				     bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) &&
553 				     bt_is_reg_set(bt, BPF_REG_0);
554 
555 			bt_clear_reg(bt, BPF_REG_0);
556 			if (bt_subprog_enter(bt))
557 				return -EFAULT;
558 
559 			if (r0_precise)
560 				bt_set_reg(bt, BPF_REG_0);
561 			/* r6-r9 and stack slots will stay set in caller frame
562 			 * bitmasks until we return back from callee(s)
563 			 */
564 			return 0;
565 		} else if (BPF_SRC(insn->code) == BPF_X) {
566 			if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg))
567 				return 0;
568 			/* dreg <cond> sreg
569 			 * Both dreg and sreg need precision before
570 			 * this insn. If only sreg was marked precise
571 			 * before it would be equally necessary to
572 			 * propagate it to dreg.
573 			 */
574 			if (!hist || !(hist->flags & INSN_F_SRC_REG_STACK))
575 				bt_set_reg(bt, sreg);
576 			if (!hist || !(hist->flags & INSN_F_DST_REG_STACK))
577 				bt_set_reg(bt, dreg);
578 		} else if (BPF_SRC(insn->code) == BPF_K) {
579 			 /* dreg <cond> K
580 			  * Only dreg still needs precision before
581 			  * this insn, so for the K-based conditional
582 			  * there is nothing new to be marked.
583 			  */
584 		}
585 	} else if (class == BPF_LD) {
586 		if (!bt_is_reg_set(bt, dreg))
587 			return 0;
588 		bt_clear_reg(bt, dreg);
589 		/* It's ld_imm64 or ld_abs or ld_ind.
590 		 * For ld_imm64 no further tracking of precision
591 		 * into parent is necessary
592 		 */
593 		if (mode == BPF_IND || mode == BPF_ABS)
594 			/* to be analyzed */
595 			return -ENOTSUPP;
596 	}
597 	/* Propagate precision marks to linked registers, to account for
598 	 * registers marked as precise in this function.
599 	 */
600 	bpf_bt_sync_linked_regs(bt, hist);
601 	return 0;
602 }
603 
604 /* the scalar precision tracking algorithm:
605  * . at the start all registers have precise=false.
606  * . scalar ranges are tracked as normal through alu and jmp insns.
607  * . once precise value of the scalar register is used in:
608  *   .  ptr + scalar alu
609  *   . if (scalar cond K|scalar)
610  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
611  *   backtrack through the verifier states and mark all registers and
612  *   stack slots with spilled constants that these scalar registers
613  *   should be precise.
614  * . during state pruning two registers (or spilled stack slots)
615  *   are equivalent if both are not precise.
616  *
617  * Note the verifier cannot simply walk register parentage chain,
618  * since many different registers and stack slots could have been
619  * used to compute single precise scalar.
620  *
621  * The approach of starting with precise=true for all registers and then
622  * backtrack to mark a register as not precise when the verifier detects
623  * that program doesn't care about specific value (e.g., when helper
624  * takes register as ARG_ANYTHING parameter) is not safe.
625  *
626  * It's ok to walk single parentage chain of the verifier states.
627  * It's possible that this backtracking will go all the way till 1st insn.
628  * All other branches will be explored for needing precision later.
629  *
630  * The backtracking needs to deal with cases like:
631  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
632  * r9 -= r8
633  * r5 = r9
634  * if r5 > 0x79f goto pc+7
635  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
636  * r5 += 1
637  * ...
638  * call bpf_perf_event_output#25
639  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
640  *
641  * and this case:
642  * r6 = 1
643  * call foo // uses callee's r6 inside to compute r0
644  * r0 += r6
645  * if r0 == 0 goto
646  *
647  * to track above reg_mask/stack_mask needs to be independent for each frame.
648  *
649  * Also if parent's curframe > frame where backtracking started,
650  * the verifier need to mark registers in both frames, otherwise callees
651  * may incorrectly prune callers. This is similar to
652  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
653  *
654  * For now backtracking falls back into conservative marking.
655  */
656 void bpf_mark_all_scalars_precise(struct bpf_verifier_env *env,
657 				 struct bpf_verifier_state *st)
658 {
659 	struct bpf_func_state *func;
660 	struct bpf_reg_state *reg;
661 	int i, j;
662 
663 	if (env->log.level & BPF_LOG_LEVEL2) {
664 		verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
665 			st->curframe);
666 	}
667 
668 	/* big hammer: mark all scalars precise in this path.
669 	 * pop_stack may still get !precise scalars.
670 	 * We also skip current state and go straight to first parent state,
671 	 * because precision markings in current non-checkpointed state are
672 	 * not needed. See why in the comment in __mark_chain_precision below.
673 	 */
674 	for (st = st->parent; st; st = st->parent) {
675 		for (i = 0; i <= st->curframe; i++) {
676 			func = st->frame[i];
677 			for (j = 0; j < BPF_REG_FP; j++) {
678 				reg = &func->regs[j];
679 				if (reg->type != SCALAR_VALUE || reg->precise)
680 					continue;
681 				reg->precise = true;
682 				if (env->log.level & BPF_LOG_LEVEL2) {
683 					verbose(env, "force_precise: frame%d: forcing r%d to be precise\n",
684 						i, j);
685 				}
686 			}
687 			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
688 				if (!bpf_is_spilled_reg(&func->stack[j]))
689 					continue;
690 				reg = &func->stack[j].spilled_ptr;
691 				if (reg->type != SCALAR_VALUE || reg->precise)
692 					continue;
693 				reg->precise = true;
694 				if (env->log.level & BPF_LOG_LEVEL2) {
695 					verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n",
696 						i, -(j + 1) * 8);
697 				}
698 			}
699 		}
700 	}
701 }
702 
703 /*
704  * bpf_mark_chain_precision() backtracks BPF program instruction sequence and
705  * chain of verifier states making sure that register *regno* (if regno >= 0)
706  * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
707  * SCALARS, as well as any other registers and slots that contribute to
708  * a tracked state of given registers/stack slots, depending on specific BPF
709  * assembly instructions (see backtrack_insns() for exact instruction handling
710  * logic). This backtracking relies on recorded jmp_history and is able to
711  * traverse entire chain of parent states. This process ends only when all the
712  * necessary registers/slots and their transitive dependencies are marked as
713  * precise.
714  *
715  * One important and subtle aspect is that precise marks *do not matter* in
716  * the currently verified state (current state). It is important to understand
717  * why this is the case.
718  *
719  * First, note that current state is the state that is not yet "checkpointed",
720  * i.e., it is not yet put into env->explored_states, and it has no children
721  * states as well. It's ephemeral, and can end up either a) being discarded if
722  * compatible explored state is found at some point or BPF_EXIT instruction is
723  * reached or b) checkpointed and put into env->explored_states, branching out
724  * into one or more children states.
725  *
726  * In the former case, precise markings in current state are completely
727  * ignored by state comparison code (see regsafe() for details). Only
728  * checkpointed ("old") state precise markings are important, and if old
729  * state's register/slot is precise, regsafe() assumes current state's
730  * register/slot as precise and checks value ranges exactly and precisely. If
731  * states turn out to be compatible, current state's necessary precise
732  * markings and any required parent states' precise markings are enforced
733  * after the fact with propagate_precision() logic, after the fact. But it's
734  * important to realize that in this case, even after marking current state
735  * registers/slots as precise, we immediately discard current state. So what
736  * actually matters is any of the precise markings propagated into current
737  * state's parent states, which are always checkpointed (due to b) case above).
738  * As such, for scenario a) it doesn't matter if current state has precise
739  * markings set or not.
740  *
741  * Now, for the scenario b), checkpointing and forking into child(ren)
742  * state(s). Note that before current state gets to checkpointing step, any
743  * processed instruction always assumes precise SCALAR register/slot
744  * knowledge: if precise value or range is useful to prune jump branch, BPF
745  * verifier takes this opportunity enthusiastically. Similarly, when
746  * register's value is used to calculate offset or memory address, exact
747  * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
748  * what we mentioned above about state comparison ignoring precise markings
749  * during state comparison, BPF verifier ignores and also assumes precise
750  * markings *at will* during instruction verification process. But as verifier
751  * assumes precision, it also propagates any precision dependencies across
752  * parent states, which are not yet finalized, so can be further restricted
753  * based on new knowledge gained from restrictions enforced by their children
754  * states. This is so that once those parent states are finalized, i.e., when
755  * they have no more active children state, state comparison logic in
756  * is_state_visited() would enforce strict and precise SCALAR ranges, if
757  * required for correctness.
758  *
759  * To build a bit more intuition, note also that once a state is checkpointed,
760  * the path we took to get to that state is not important. This is crucial
761  * property for state pruning. When state is checkpointed and finalized at
762  * some instruction index, it can be correctly and safely used to "short
763  * circuit" any *compatible* state that reaches exactly the same instruction
764  * index. I.e., if we jumped to that instruction from a completely different
765  * code path than original finalized state was derived from, it doesn't
766  * matter, current state can be discarded because from that instruction
767  * forward having a compatible state will ensure we will safely reach the
768  * exit. States describe preconditions for further exploration, but completely
769  * forget the history of how we got here.
770  *
771  * This also means that even if we needed precise SCALAR range to get to
772  * finalized state, but from that point forward *that same* SCALAR register is
773  * never used in a precise context (i.e., it's precise value is not needed for
774  * correctness), it's correct and safe to mark such register as "imprecise"
775  * (i.e., precise marking set to false). This is what we rely on when we do
776  * not set precise marking in current state. If no child state requires
777  * precision for any given SCALAR register, it's safe to dictate that it can
778  * be imprecise. If any child state does require this register to be precise,
779  * we'll mark it precise later retroactively during precise markings
780  * propagation from child state to parent states.
781  *
782  * Skipping precise marking setting in current state is a mild version of
783  * relying on the above observation. But we can utilize this property even
784  * more aggressively by proactively forgetting any precise marking in the
785  * current state (which we inherited from the parent state), right before we
786  * checkpoint it and branch off into new child state. This is done by
787  * mark_all_scalars_imprecise() to hopefully get more permissive and generic
788  * finalized states which help in short circuiting more future states.
789  */
790 int bpf_mark_chain_precision(struct bpf_verifier_env *env,
791 			    struct bpf_verifier_state *starting_state,
792 			    int regno,
793 			    bool *changed)
794 {
795 	struct bpf_verifier_state *st = starting_state;
796 	struct backtrack_state *bt = &env->bt;
797 	int first_idx = st->first_insn_idx;
798 	int last_idx = starting_state->insn_idx;
799 	int subseq_idx = -1;
800 	struct bpf_func_state *func;
801 	bool tmp, skip_first = true;
802 	struct bpf_reg_state *reg;
803 	int i, fr, err;
804 
805 	if (!env->bpf_capable)
806 		return 0;
807 
808 	changed = changed ?: &tmp;
809 	/* set frame number from which we are starting to backtrack */
810 	bt_init(bt, starting_state->curframe);
811 
812 	/* Do sanity checks against current state of register and/or stack
813 	 * slot, but don't set precise flag in current state, as precision
814 	 * tracking in the current state is unnecessary.
815 	 */
816 	func = st->frame[bt->frame];
817 	if (regno >= 0) {
818 		reg = &func->regs[regno];
819 		if (reg->type != SCALAR_VALUE) {
820 			verifier_bug(env, "backtracking misuse");
821 			return -EFAULT;
822 		}
823 		bt_set_reg(bt, regno);
824 	}
825 
826 	if (bt_empty(bt))
827 		return 0;
828 
829 	for (;;) {
830 		DECLARE_BITMAP(mask, 64);
831 		u32 history = st->jmp_history_cnt;
832 		struct bpf_jmp_history_entry *hist;
833 
834 		if (env->log.level & BPF_LOG_LEVEL2) {
835 			verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
836 				bt->frame, last_idx, first_idx, subseq_idx);
837 		}
838 
839 		if (last_idx < 0) {
840 			/* we are at the entry into subprog, which
841 			 * is expected for global funcs, but only if
842 			 * requested precise registers are R1-R5
843 			 * (which are global func's input arguments)
844 			 */
845 			if (st->curframe == 0 &&
846 			    st->frame[0]->subprogno > 0 &&
847 			    st->frame[0]->callsite == BPF_MAIN_FUNC &&
848 			    bt_stack_mask(bt) == 0 &&
849 			    (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) {
850 				bitmap_from_u64(mask, bt_reg_mask(bt));
851 				for_each_set_bit(i, mask, 32) {
852 					reg = &st->frame[0]->regs[i];
853 					bt_clear_reg(bt, i);
854 					if (reg->type == SCALAR_VALUE) {
855 						reg->precise = true;
856 						*changed = true;
857 					}
858 				}
859 				return 0;
860 			}
861 
862 			verifier_bug(env, "backtracking func entry subprog %d reg_mask %x stack_mask %llx",
863 				     st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
864 			return -EFAULT;
865 		}
866 
867 		for (i = last_idx;;) {
868 			if (skip_first) {
869 				err = 0;
870 				skip_first = false;
871 			} else {
872 				hist = get_jmp_hist_entry(st, history, i);
873 				err = backtrack_insn(env, i, subseq_idx, hist, bt);
874 			}
875 			if (err == -ENOTSUPP) {
876 				bpf_mark_all_scalars_precise(env, starting_state);
877 				bt_reset(bt);
878 				return 0;
879 			} else if (err) {
880 				return err;
881 			}
882 			if (bt_empty(bt))
883 				/* Found assignment(s) into tracked register in this state.
884 				 * Since this state is already marked, just return.
885 				 * Nothing to be tracked further in the parent state.
886 				 */
887 				return 0;
888 			subseq_idx = i;
889 			i = get_prev_insn_idx(st, i, &history);
890 			if (i == -ENOENT)
891 				break;
892 			if (i >= env->prog->len) {
893 				/* This can happen if backtracking reached insn 0
894 				 * and there are still reg_mask or stack_mask
895 				 * to backtrack.
896 				 * It means the backtracking missed the spot where
897 				 * particular register was initialized with a constant.
898 				 */
899 				verifier_bug(env, "backtracking idx %d", i);
900 				return -EFAULT;
901 			}
902 		}
903 		st = st->parent;
904 		if (!st)
905 			break;
906 
907 		for (fr = bt->frame; fr >= 0; fr--) {
908 			func = st->frame[fr];
909 			bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
910 			for_each_set_bit(i, mask, 32) {
911 				reg = &func->regs[i];
912 				if (reg->type != SCALAR_VALUE) {
913 					bt_clear_frame_reg(bt, fr, i);
914 					continue;
915 				}
916 				if (reg->precise) {
917 					bt_clear_frame_reg(bt, fr, i);
918 				} else {
919 					reg->precise = true;
920 					*changed = true;
921 				}
922 			}
923 
924 			bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
925 			for_each_set_bit(i, mask, 64) {
926 				if (verifier_bug_if(i >= func->allocated_stack / BPF_REG_SIZE,
927 						    env, "stack slot %d, total slots %d",
928 						    i, func->allocated_stack / BPF_REG_SIZE))
929 					return -EFAULT;
930 
931 				if (!bpf_is_spilled_scalar_reg(&func->stack[i])) {
932 					bt_clear_frame_slot(bt, fr, i);
933 					continue;
934 				}
935 				reg = &func->stack[i].spilled_ptr;
936 				if (reg->precise) {
937 					bt_clear_frame_slot(bt, fr, i);
938 				} else {
939 					reg->precise = true;
940 					*changed = true;
941 				}
942 			}
943 			for (i = 0; i < func->out_stack_arg_cnt; i++) {
944 				if (!bt_is_frame_stack_arg_slot_set(bt, fr, i))
945 					continue;
946 				reg = &func->stack_arg_regs[i];
947 				if (reg->type != SCALAR_VALUE || reg->precise) {
948 					bt_clear_frame_stack_arg_slot(bt, fr, i);
949 				} else {
950 					reg->precise = true;
951 					*changed = true;
952 				}
953 			}
954 			if (env->log.level & BPF_LOG_LEVEL2) {
955 				fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
956 					     bt_frame_reg_mask(bt, fr));
957 				verbose(env, "mark_precise: frame%d: parent state regs=%s ",
958 					fr, env->tmp_str_buf);
959 				bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
960 					       bt_frame_stack_mask(bt, fr));
961 				verbose(env, "stack=%s: ", env->tmp_str_buf);
962 				print_verifier_state(env, st, fr, true);
963 			}
964 		}
965 
966 		if (bt_empty(bt))
967 			return 0;
968 
969 		subseq_idx = first_idx;
970 		last_idx = st->last_insn_idx;
971 		first_idx = st->first_insn_idx;
972 	}
973 
974 	/* if we still have requested precise regs or slots, we missed
975 	 * something (e.g., stack access through non-r10 register), so
976 	 * fallback to marking all precise
977 	 */
978 	if (!bt_empty(bt)) {
979 		bpf_mark_all_scalars_precise(env, starting_state);
980 		bt_reset(bt);
981 	}
982 
983 	return 0;
984 }
985