xref: /linux/tools/testing/selftests/bpf/progs/verifier_live_stack.c (revision aec2f682d47c54ef434b2d440992626d80b1ebdc)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
3 
4 #include <linux/bpf.h>
5 #include <bpf/bpf_helpers.h>
6 #include "../../../include/linux/filter.h"
7 #include "bpf_misc.h"
8 
9 char _license[] SEC("license") = "GPL";
10 struct {
11 	__uint(type, BPF_MAP_TYPE_HASH);
12 	__uint(max_entries, 1);
13 	__type(key, int);
14 	__type(value, long long);
15 } map SEC(".maps");
16 
17 struct {
18 	__uint(type, BPF_MAP_TYPE_ARRAY);
19 	__uint(max_entries, 1);
20 	__type(key, __u32);
21 	__type(value, __u64);
22 } array_map_8b SEC(".maps");
23 
24 const char snprintf_u64_fmt[] = "%llu";
25 
26 SEC("socket")
27 __log_level(2)
28 __msg("0: (79) r1 = *(u64 *)(r10 -8)        ; use: fp0-8")
29 __msg("1: (79) r2 = *(u64 *)(r10 -24)       ; use: fp0-24")
30 __msg("2: (7b) *(u64 *)(r10 -8) = r1        ; def: fp0-8")
31 __naked void simple_read_simple_write(void)
32 {
33 	asm volatile (
34 	"r1 = *(u64 *)(r10 - 8);"
35 	"r2 = *(u64 *)(r10 - 24);"
36 	"*(u64 *)(r10 - 8) = r1;"
37 	"r0 = 0;"
38 	"exit;"
39 	::: __clobber_all);
40 }
41 
42 SEC("socket")
43 __log_level(2)
44 __msg("2: (79) r0 = *(u64 *)(r10 -8)        ; use: fp0-8")
45 __msg("6: (79) r0 = *(u64 *)(r10 -16)       ; use: fp0-16")
46 __naked void read_write_join(void)
47 {
48 	asm volatile (
49 	"call %[bpf_get_prandom_u32];"
50 	"if r0 > 42 goto 1f;"
51 	"r0 = *(u64 *)(r10 - 8);"
52 	"*(u64 *)(r10 - 32) = r0;"
53 	"*(u64 *)(r10 - 40) = r0;"
54 	"exit;"
55 "1:"
56 	"r0 = *(u64 *)(r10 - 16);"
57 	"*(u64 *)(r10 - 32) = r0;"
58 	"exit;"
59 	:: __imm(bpf_get_prandom_u32)
60 	: __clobber_all);
61 }
62 
63 SEC("socket")
64 __log_level(2)
65 __msg("stack use/def subprog#0 must_write_not_same_slot (d0,cs0):")
66 __msg("6: (7b) *(u64 *)(r2 +0) = r0{{$}}")
67 __msg("Live regs before insn:")
68 __naked void must_write_not_same_slot(void)
69 {
70 	asm volatile (
71 	"call %[bpf_get_prandom_u32];"
72 	"r1 = -8;"
73 	"if r0 > 42 goto 1f;"
74 	"r1 = -16;"
75 "1:"
76 	"r2 = r10;"
77 	"r2 += r1;"
78 	"*(u64 *)(r2 + 0) = r0;"
79 	"exit;"
80 	:: __imm(bpf_get_prandom_u32)
81 	: __clobber_all);
82 }
83 
84 SEC("socket")
85 __log_level(2)
86 __msg("0: (7a) *(u64 *)(r10 -8) = 0         ; def: fp0-8")
87 __msg("5: (85) call bpf_map_lookup_elem#1   ; use: fp0-8h")
88 __naked void must_write_not_same_type(void)
89 {
90 	asm volatile (
91 	"*(u64*)(r10 - 8) = 0;"
92 	"r2 = r10;"
93 	"r2 += -8;"
94 	"r1 = %[map] ll;"
95 	"call %[bpf_map_lookup_elem];"
96 	"if r0 != 0 goto 1f;"
97 	"r0 = r10;"
98 	"r0 += -16;"
99 "1:"
100 	"*(u64 *)(r0 + 0) = 42;"
101 	"exit;"
102 	:
103         : __imm(bpf_get_prandom_u32),
104 	  __imm(bpf_map_lookup_elem),
105 	  __imm_addr(map)
106 	: __clobber_all);
107 }
108 
109 SEC("socket")
110 __log_level(2)
111 /* Callee writes fp[0]-8: stack_use at call site has slots 0,1 live */
112 __msg("stack use/def subprog#0 caller_stack_write (d0,cs0):")
113 __msg("2: (85) call pc+1{{$}}")
114 __msg("stack use/def subprog#1 write_first_param (d1,cs2):")
115 __msg("4: (7a) *(u64 *)(r1 +0) = 7          ; def: fp0-8")
116 __naked void caller_stack_write(void)
117 {
118 	asm volatile (
119 	"r1 = r10;"
120 	"r1 += -8;"
121 	"call write_first_param;"
122 	"exit;"
123 	::: __clobber_all);
124 }
125 
126 static __used __naked void write_first_param(void)
127 {
128 	asm volatile (
129 	"*(u64 *)(r1 + 0) = 7;"
130 	"r0 = 0;"
131 	"exit;"
132 	::: __clobber_all);
133 }
134 
135 SEC("socket")
136 __log_level(2)
137 __msg("stack use/def subprog#0 caller_stack_read (d0,cs0):")
138 __msg("2: (85) call pc+{{.*}}                   ; use: fp0-8{{$}}")
139 __msg("5: (85) call pc+{{.*}}                   ; use: fp0-16{{$}}")
140 __msg("stack use/def subprog#1 read_first_param (d1,cs2):")
141 __msg("7: (79) r0 = *(u64 *)(r1 +0)         ; use: fp0-8{{$}}")
142 __msg("8: (95) exit")
143 __msg("stack use/def subprog#1 read_first_param (d1,cs5):")
144 __msg("7: (79) r0 = *(u64 *)(r1 +0)         ; use: fp0-16{{$}}")
145 __msg("8: (95) exit")
146 __naked void caller_stack_read(void)
147 {
148 	asm volatile (
149 	"r1 = r10;"
150 	"r1 += -8;"
151 	"call read_first_param;"
152 	"r1 = r10;"
153 	"r1 += -16;"
154 	"call read_first_param;"
155 	"exit;"
156 	::: __clobber_all);
157 }
158 
159 static __used __naked void read_first_param(void)
160 {
161 	asm volatile (
162 	"r0 = *(u64 *)(r1 + 0);"
163 	"exit;"
164 	::: __clobber_all);
165 }
166 
167 SEC("socket")
168 __success
169 __naked void arg_track_join_convergence(void)
170 {
171 	asm volatile (
172 	"r1 = 1;"
173 	"r2 = 2;"
174 	"call arg_track_join_convergence_subprog;"
175 	"r0 = 0;"
176 	"exit;"
177 	::: __clobber_all);
178 }
179 
180 static __used __naked void arg_track_join_convergence_subprog(void)
181 {
182 	asm volatile (
183 	"if r1 == 0 goto 1f;"
184 	"r0 = r1;"
185 	"goto 2f;"
186 "1:"
187 	"r0 = r2;"
188 "2:"
189 	"r0 = 0;"
190 	"exit;"
191 	::: __clobber_all);
192 }
193 
194 SEC("socket")
195 __flag(BPF_F_TEST_STATE_FREQ)
196 __log_level(2)
197 /* fp0-8 consumed at insn 9, dead by insn 11. stack_def at insn 4 kills slots 0,1. */
198 __msg("4: (7b) *(u64 *)(r10 -8) = r0        ; def: fp0-8")
199 /* stack_use at call site: callee reads fp0-8, slots 0,1 live */
200 __msg("7: (85) call pc+{{.*}}               ; use: fp0-8")
201 /* read_first_param2: no caller stack live inside callee after first read */
202 __msg("9: (79) r0 = *(u64 *)(r1 +0)         ; use: fp0-8")
203 __msg("10: (b7) r0 = 0{{$}}")
204 __msg("11: (05) goto pc+0{{$}}")
205 __msg("12: (95) exit")
206 /*
207  * Checkpoint at goto +0 fires because fp0-8 is dead → state pruning.
208  */
209 __msg("12: safe")
210 __naked void caller_stack_pruning(void)
211 {
212 	asm volatile (
213 	"call %[bpf_get_prandom_u32];"
214 	"if r0 == 42 goto 1f;"
215 	"r0 = %[map] ll;"
216 "1:"
217 	"*(u64 *)(r10 - 8) = r0;"
218 	"r1 = r10;"
219 	"r1 += -8;"
220 	/*
221 	 * fp[0]-8 is either pointer to map or a scalar,
222 	 * preventing state pruning at checkpoint created for call.
223 	 */
224 	"call read_first_param2;"
225 	"exit;"
226 	:
227 	: __imm(bpf_get_prandom_u32),
228 	  __imm_addr(map)
229 	: __clobber_all);
230 }
231 
232 static __used __naked void read_first_param2(void)
233 {
234 	asm volatile (
235 	"r0 = *(u64 *)(r1 + 0);"
236 	"r0 = 0;"
237 	/*
238 	 * Checkpoint at goto +0 should fire,
239 	 * as caller stack fp[0]-8 is not alive at this point.
240 	 */
241 	"goto +0;"
242 	"exit;"
243 	::: __clobber_all);
244 }
245 
246 SEC("socket")
247 __flag(BPF_F_TEST_STATE_FREQ)
248 __failure
249 __msg("R1 type=scalar expected=map_ptr")
250 __naked void caller_stack_pruning_callback(void)
251 {
252 	asm volatile (
253 	"r0 = %[map] ll;"
254 	"*(u64 *)(r10 - 8) = r0;"
255 	"r1 = 2;"
256 	"r2 = loop_cb ll;"
257 	"r3 = r10;"
258 	"r3 += -8;"
259 	"r4 = 0;"
260 	/*
261 	 * fp[0]-8 is either pointer to map or a scalar,
262 	 * preventing state pruning at checkpoint created for call.
263 	 */
264 	"call %[bpf_loop];"
265 	"r0 = 42;"
266 	"exit;"
267 	:
268 	: __imm(bpf_get_prandom_u32),
269 	  __imm(bpf_loop),
270 	  __imm_addr(map)
271 	: __clobber_all);
272 }
273 
274 static __used __naked void loop_cb(void)
275 {
276 	asm volatile (
277 	/*
278 	 * Checkpoint at function entry should not fire, as caller
279 	 * stack fp[0]-8 is alive at this point.
280 	 */
281 	"r6 = r2;"
282 	"r1 = *(u64 *)(r6 + 0);"
283 	"*(u64*)(r10 - 8) = 7;"
284 	"r2 = r10;"
285 	"r2 += -8;"
286 	"call %[bpf_map_lookup_elem];"
287 	/*
288 	 * This should stop verifier on a second loop iteration,
289 	 * but only if verifier correctly maintains that fp[0]-8
290 	 * is still alive.
291 	 */
292 	"*(u64 *)(r6 + 0) = 0;"
293 	"r0 = 0;"
294 	"exit;"
295 	:
296 	: __imm(bpf_map_lookup_elem),
297 	  __imm(bpf_get_prandom_u32)
298 	: __clobber_all);
299 }
300 
301 /*
302  * Because of a bug in verifier.c:compute_postorder()
303  * the program below overflowed traversal queue in that function.
304  */
305 SEC("socket")
306 __naked void syzbot_postorder_bug1(void)
307 {
308 	asm volatile (
309 	"r0 = 0;"
310 	"if r0 != 0 goto -1;"
311 	"exit;"
312 	::: __clobber_all);
313 }
314 
315 struct {
316         __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
317         __uint(max_entries, 1);
318         __type(key, __u32);
319         __type(value, __u32);
320 } map_array SEC(".maps");
321 
322 SEC("socket")
323 __failure __msg("invalid read from stack R2 off=-1024 size=8")
324 __flag(BPF_F_TEST_STATE_FREQ)
325 __naked unsigned long caller_stack_write_tail_call(void)
326 {
327         asm volatile (
328 	"r6 = r1;"
329 	"*(u64 *)(r10 - 8) = -8;"
330         "call %[bpf_get_prandom_u32];"
331         "if r0 != 42 goto 1f;"
332         "goto 2f;"
333   "1:"
334         "*(u64 *)(r10 - 8) = -1024;"
335   "2:"
336         "r1 = r6;"
337         "r2 = r10;"
338         "r2 += -8;"
339         "call write_tail_call;"
340         "r1 = *(u64 *)(r10 - 8);"
341         "r2 = r10;"
342         "r2 += r1;"
343         "r0 = *(u64 *)(r2 + 0);"
344         "exit;"
345         :: __imm(bpf_get_prandom_u32)
346 	: __clobber_all);
347 }
348 
349 static __used __naked unsigned long write_tail_call(void)
350 {
351         asm volatile (
352         "r6 = r2;"
353         "r2 = %[map_array] ll;"
354         "r3 = 0;"
355         "call %[bpf_tail_call];"
356         "*(u64 *)(r6 + 0) = -16;"
357         "r0 = 0;"
358         "exit;"
359 	:
360 	: __imm(bpf_tail_call),
361           __imm_addr(map_array)
362         : __clobber_all);
363 }
364 
365 /* Test precise subprog stack access analysis.
366  * Caller passes fp-32 (SPI 3) to callee that only accesses arg+0 and arg+8
367  * (SPIs 3 and 2). Slots 0 and 1 should NOT be live at the call site.
368  *
369  * Insn layout:
370  *   0: *(u64*)(r10 - 8) = 0      write SPI 0
371  *   1: *(u64*)(r10 - 16) = 0     write SPI 1
372  *   2: *(u64*)(r10 - 24) = 0     write SPI 2
373  *   3: *(u64*)(r10 - 32) = 0     write SPI 3
374  *   4: r1 = r10
375  *   5: r1 += -32
376  *   6: call precise_read_two      passes fp-32 (SPI 3)
377  *   7: r0 = 0
378  *   8: exit
379  *
380  * At insn 6 only SPIs 2,3 should be live (slots 4-7, 0xf0).
381  * SPIs 0,1 are written but never read → dead.
382  */
383 SEC("socket")
384 __log_level(2)
385 __msg("6: (85) call pc+{{.*}}                   ; use: fp0-24 fp0-32{{$}}")
386 __naked void subprog_precise_stack_access(void)
387 {
388 	asm volatile (
389 	"*(u64 *)(r10 - 8) = 0;"
390 	"*(u64 *)(r10 - 16) = 0;"
391 	"*(u64 *)(r10 - 24) = 0;"
392 	"*(u64 *)(r10 - 32) = 0;"
393 	"r1 = r10;"
394 	"r1 += -32;"
395 	"call precise_read_two;"
396 	"r0 = 0;"
397 	"exit;"
398 	::: __clobber_all);
399 }
400 
401 /* Callee reads only at arg+0 (SPI 3) and arg+8 (SPI 2) */
402 static __used __naked void precise_read_two(void)
403 {
404 	asm volatile (
405 	"r0 = *(u64 *)(r1 + 0);"
406 	"r2 = *(u64 *)(r1 + 8);"
407 	"r0 = 0;"
408 	"exit;"
409 	::: __clobber_all);
410 }
411 
412 /* Test that multi-level subprog calls (callee passes arg-derived ptr
413  * to another BPF subprog) are analyzed precisely.
414  *
415  * Caller passes fp-32 (SPI 3). The callee forwards it to inner_callee.
416  * inner_callee only reads at offset 0 from the pointer.
417  * The analysis recurses into forward_to_inner -> inner_callee and
418  * determines only SPI 3 is accessed (slots 6-7, 0xc0), not all of SPIs 0-3.
419  *
420  * Insn layout:
421  *   0: *(u64*)(r10 - 8) = 0      write SPI 0
422  *   1: *(u64*)(r10 - 16) = 0     write SPI 1
423  *   2: *(u64*)(r10 - 24) = 0     write SPI 2
424  *   3: *(u64*)(r10 - 32) = 0     write SPI 3
425  *   4: r1 = r10
426  *   5: r1 += -32
427  *   6: call forward_to_inner      passes fp-32 (SPI 3)
428  *   7: r0 = 0
429  *   8: exit
430  */
431 SEC("socket")
432 __log_level(2)
433 __msg("6: (85) call pc+{{.*}}                   ; use: fp0-32{{$}}")
434 __naked void subprog_multilevel_conservative(void)
435 {
436 	asm volatile (
437 	"*(u64 *)(r10 - 8) = 0;"
438 	"*(u64 *)(r10 - 16) = 0;"
439 	"*(u64 *)(r10 - 24) = 0;"
440 	"*(u64 *)(r10 - 32) = 0;"
441 	"r1 = r10;"
442 	"r1 += -32;"
443 	"call forward_to_inner;"
444 	"r0 = 0;"
445 	"exit;"
446 	::: __clobber_all);
447 }
448 
449 /* Forwards arg to another subprog */
450 static __used __naked void forward_to_inner(void)
451 {
452 	asm volatile (
453 	"call inner_callee;"
454 	"r0 = 0;"
455 	"exit;"
456 	::: __clobber_all);
457 }
458 
459 static __used __naked void inner_callee(void)
460 {
461 	asm volatile (
462 	"r0 = *(u64 *)(r1 + 0);"
463 	"r0 = 0;"
464 	"exit;"
465 	::: __clobber_all);
466 }
467 
468 /* Test multi-frame precision loss: callee consumes caller stack early,
469  * but static liveness keeps it live at pruning points inside callee.
470  *
471  * Caller stores map_ptr or scalar(42) at fp-8, then calls
472  * consume_and_call_inner. The callee reads fp0-8 at entry (consuming
473  * the slot), then calls do_nothing2. After do_nothing2 returns (a
474  * pruning point), fp-8 should be dead -- the read already happened.
475  * But because the call instruction's stack_use includes SPI 0, the
476  * static live_stack_before at insn 7 is 0x1, keeping fp-8 live inside
477  * the callee and preventing state pruning between the two paths.
478  *
479  * Insn layout:
480  *   0: call bpf_get_prandom_u32
481  *   1: if r0 == 42 goto pc+2    -> insn 4
482  *   2: r0 = map ll (ldimm64 part1)
483  *   3: (ldimm64 part2)
484  *   4: *(u64)(r10 - 8) = r0     fp-8 = map_ptr OR scalar(42)
485  *   5: r1 = r10
486  *   6: r1 += -8
487  *   7: call consume_and_call_inner
488  *   8: r0 = 0
489  *   9: exit
490  *
491  * At insn 7, live_stack_before = 0x3 (slots 0-1 live due to stack_use).
492  * At insn 8, live_stack_before = 0x0 (SPI 0 dead, caller doesn't need it).
493  */
494 SEC("socket")
495 __flag(BPF_F_TEST_STATE_FREQ)
496 __log_level(2)
497 __success
498 __msg(" 7: (85) call pc+{{.*}}                   ; use: fp0-8")
499 __msg(" 8: {{.*}} (b7)")
500 __naked void callee_consumed_caller_stack(void)
501 {
502 	asm volatile (
503 	"call %[bpf_get_prandom_u32];"
504 	"if r0 == 42 goto 1f;"
505 	"r0 = %[map] ll;"
506 "1:"
507 	"*(u64 *)(r10 - 8) = r0;"
508 	"r1 = r10;"
509 	"r1 += -8;"
510 	"call consume_and_call_inner;"
511 	"r0 = 0;"
512 	"exit;"
513 	:
514 	: __imm(bpf_get_prandom_u32),
515 	  __imm_addr(map)
516 	: __clobber_all);
517 }
518 
519 static __used __naked void consume_and_call_inner(void)
520 {
521 	asm volatile (
522 	"r0 = *(u64 *)(r1 + 0);"	/* read fp[0]-8 into caller-saved r0 */
523 	"call do_nothing2;"		/* inner call clobbers r0 */
524 	"r0 = 0;"
525 	"goto +0;"			/* checkpoint */
526 	"r0 = 0;"
527 	"goto +0;"			/* checkpoint */
528 	"r0 = 0;"
529 	"goto +0;"			/* checkpoint */
530 	"r0 = 0;"
531 	"goto +0;"			/* checkpoint */
532 	"exit;"
533 	::: __clobber_all);
534 }
535 
536 static __used __naked void do_nothing2(void)
537 {
538 	asm volatile (
539 	"r0 = 0;"
540 	"r0 = 0;"
541 	"r0 = 0;"
542 	"r0 = 0;"
543 	"r0 = 0;"
544 	"r0 = 0;"
545 	"r0 = 0;"
546 	"exit;"
547 	::: __clobber_all);
548 }
549 
550 /*
551  * Reproducer for unsound pruning when clean_verifier_state() promotes
552  * live STACK_ZERO bytes to STACK_MISC.
553  *
554  * Program shape:
555  * - Build key at fp-4:
556  *   - path A keeps key byte as STACK_ZERO;
557  *   - path B writes unknown byte making it STACK_MISC.
558  * - Branches merge at a prune point before map_lookup.
559  * - map_lookup on ARRAY map is value-sensitive to constant zero key:
560  *   - path A: const key 0 => PTR_TO_MAP_VALUE (non-NULL);
561  *   - path B: non-const key => PTR_TO_MAP_VALUE_OR_NULL.
562  * - Dereference lookup result without null check.
563  *
564  * Note this behavior won't trigger at fp-8, since the verifier will
565  * track 32-bit scalar spill differently as spilled_ptr.
566  *
567  * Correct verifier behavior: reject (path B unsafe).
568  * With blanket STACK_ZERO->STACK_MISC promotion on live slots, cached path A
569  * state can be generalized and incorrectly prune path B, making program load.
570  */
571 SEC("socket")
572 __flag(BPF_F_TEST_STATE_FREQ)
573 __failure __msg("R0 invalid mem access 'map_value_or_null'")
574 __naked void stack_zero_to_misc_unsound_array_lookup(void)
575 {
576 	asm volatile (
577 	/* key at fp-4: all bytes STACK_ZERO */
578 	"*(u32 *)(r10 - 4) = 0;"
579 	"call %[bpf_get_prandom_u32];"
580 	/* fall-through (path A) explored first */
581 	"if r0 != 0 goto l_nonconst%=;"
582 	/* path A: keep key constant zero */
583 	"goto l_lookup%=;"
584 "l_nonconst%=:"
585 	/* path B: key byte turns to STACK_MISC, key no longer const */
586 	"*(u8 *)(r10 - 4) = r0;"
587 "l_lookup%=:"
588 	/* value-sensitive lookup */
589 	"r2 = r10;"
590 	"r2 += -4;"
591 	"r1 = %[array_map_8b] ll;"
592 	"call %[bpf_map_lookup_elem];"
593 	/* unsafe when lookup result is map_value_or_null */
594 	"r0 = *(u64 *)(r0 + 0);"
595 	"exit;"
596 	:
597 	: __imm(bpf_get_prandom_u32),
598 	  __imm(bpf_map_lookup_elem),
599 	  __imm_addr(array_map_8b)
600 	: __clobber_all);
601 }
602 
603 /*
604  * Subprog variant of stack_zero_to_misc_unsound_array_lookup.
605  *
606  * Check unsound pruning when a callee modifies the caller's
607  * stack through a pointer argument.
608  *
609  * Program shape:
610  *   main:
611  *     *(u32)(fp - 4) = 0            key = 0 (all bytes STACK_ZERO)
612  *     r1 = fp - 4
613  *     call maybe_clobber_key        may overwrite key[0] with scalar
614  *     <-- prune point: two states meet here -->
615  *     r2 = fp - 4
616  *     r1 = array_map_8b
617  *     call bpf_map_lookup_elem      value-sensitive on const-zero key
618  *     r0 = *(u64)(r0 + 0)           deref without null check
619  *     exit
620  *
621  *   maybe_clobber_key(r1):
622  *     r6 = r1                       save &key
623  *     call bpf_get_prandom_u32
624  *     if r0 == 0 goto skip          path A: key stays STACK_ZERO
625  *     *(u8)(r6 + 0) = r0            path B: key[0] becomes STACK_MISC
626  *   skip:
627  *     r0 = 0
628  *     exit
629  *
630  * Path A: const-zero key => array lookup => PTR_TO_MAP_VALUE => deref OK.
631  * Path B: non-const key  => array lookup => PTR_TO_MAP_VALUE_OR_NULL => UNSAFE.
632  *
633  * If the cleaner collapses STACK_ZERO -> STACK_MISC for the live key
634  * slot, path A's cached state matches path B, pruning the unsafe path.
635  *
636  * Correct verifier behaviour: reject.
637  */
638 SEC("socket")
639 __flag(BPF_F_TEST_STATE_FREQ)
640 __failure __msg("R0 invalid mem access 'map_value_or_null'")
641 __naked void subprog_stack_zero_to_misc_unsound(void)
642 {
643 	asm volatile (
644 	/* key at fp-4: all bytes STACK_ZERO */
645 	"*(u32 *)(r10 - 4) = 0;"
646 	/* subprog may clobber key[0] with a scalar byte */
647 	"r1 = r10;"
648 	"r1 += -4;"
649 	"call maybe_clobber_key;"
650 	/* value-sensitive array lookup */
651 	"r2 = r10;"
652 	"r2 += -4;"
653 	"r1 = %[array_map_8b] ll;"
654 	"call %[bpf_map_lookup_elem];"
655 	/* unsafe when result is map_value_or_null (path B) */
656 	"r0 = *(u64 *)(r0 + 0);"
657 	"exit;"
658 	:
659 	: __imm(bpf_map_lookup_elem),
660 	  __imm_addr(array_map_8b)
661 	: __clobber_all);
662 }
663 
664 static __used __naked void maybe_clobber_key(void)
665 {
666 	asm volatile (
667 	"r6 = r1;"
668 	"call %[bpf_get_prandom_u32];"
669 	/* path A (r0==0): key stays STACK_ZERO, explored first */
670 	"if r0 == 0 goto 1f;"
671 	/* path B (r0!=0): overwrite key[0] with scalar */
672 	"*(u8 *)(r6 + 0) = r0;"
673 	"1:"
674 	"r0 = 0;"
675 	"exit;"
676 	:: __imm(bpf_get_prandom_u32)
677 	: __clobber_all);
678 }
679 
680 /*
681  * Demonstrate that subprog arg spill/reload breaks arg tracking,
682  * inflating caller stack liveness and preventing state pruning.
683  *
684  * modifier2(fp-24) has two paths: one writes a scalar to *(r1+8)
685  * = caller fp-16, the other leaves it as zero.  After modifier2
686  * returns, fp-16 is never read again — it is dead.
687  *
688  * spill_reload_reader2(fp-24) only reads caller fp-8 via
689  * *(r1+16), but it spills r1 across a helper call.  This
690  * breaks compute_subprog_arg_access(): the reload from callee
691  * stack cannot be connected back to arg1, so arg1 access goes
692  * "all (conservative)".  At the call site (r1 = fp-24, slot 5)
693  * apply_callee_stack_access() marks slots 0..5 as stack_use —
694  * pulling fp-16 (slots 2-3) into live_stack_before even though
695  * the reader never touches it.
696  *
697  * Result: at modifier2's return point two states with different
698  * fp-16 values cannot be pruned.
699  *
700  * With correct (or old dynamic) liveness fp-16 is dead at that
701  * point and the states prune → "6: safe" appears in the log.
702  */
703 SEC("socket")
704 __flag(BPF_F_TEST_STATE_FREQ)
705 __log_level(2)
706 __success
707 __msg("6: safe")
708 __naked void spill_reload_inflates_stack_liveness(void)
709 {
710 	asm volatile (
711 	/* struct at fp-24: { ctx; ptr; tail; } */
712 	"*(u64 *)(r10 - 24) = r1;"		/* fp-24 = ctx */
713 	"*(u64 *)(r10 - 16) = r1;"		/* fp-16 = ctx (STACK_SPILL ptr) */
714 	"*(u64 *)(r10 - 8) = 0;"		/* fp-8  = tail */
715 	/* modifier2 writes different values to fp-16 on two paths */
716 	"r1 = r10;"
717 	"r1 += -24;"
718 	"call modifier2;"
719 	/* insn 6: prune point — two states with different fp-16
720 	 * path A: fp-16 = STACK_MISC  (scalar overwrote pointer)
721 	 * path B: fp-16 = STACK_SPILL (original ctx pointer)
722 	 * STACK_MISC does NOT subsume STACK_SPILL(ptr),
723 	 * so pruning fails unless fp-16 is cleaned (dead).
724 	 */
725 	"r1 = r10;"
726 	"r1 += -24;"
727 	"call spill_reload_reader2;"		/* reads fp-8 via *(r1+16) */
728 	"r0 = 0;"
729 	"exit;"
730 	::: __clobber_all);
731 }
732 
733 /* Two paths: one writes a scalar to *(r1+8) = caller fp-16,
734  * the other leaves it unchanged.  Both return 0 via separate
735  * exits to prevent pruning inside the subprog at the merge.
736  */
737 static __used __naked void modifier2(void)
738 {
739 	asm volatile (
740 	"r6 = r1;"
741 	"call %[bpf_get_prandom_u32];"
742 	"if r0 == 0 goto 1f;"
743 	"*(u64 *)(r6 + 8) = r0;"		/* fp-16 = random */
744 	"r0 = 0;"
745 	"exit;"					/* path A exit */
746 	"1:"
747 	"r0 = 0;"
748 	"exit;"					/* path B exit */
749 	:: __imm(bpf_get_prandom_u32)
750 	: __clobber_all);
751 }
752 
753 /* Receives r1 = caller fp-24.  Only reads *(r1+16) = fp-8.
754  * Spills r1 across a helper call → arg tracking goes conservative →
755  * slots 0..5 all appear used instead of just slot 1 (fp-8).
756  */
757 static __used __naked void spill_reload_reader2(void)
758 {
759 	asm volatile (
760 	"*(u64 *)(r10 - 8) = r1;"		/* spill arg1 */
761 	"call %[bpf_get_prandom_u32];"		/* clobbers r1-r5 */
762 	"r1 = *(u64 *)(r10 - 8);"		/* reload arg1 */
763 	"r0 = *(u64 *)(r1 + 16);"		/* read caller fp-8 */
764 	"r0 = 0;"
765 	"exit;"
766 	:: __imm(bpf_get_prandom_u32)
767 	: __clobber_all);
768 }
769 
770 /* BTF FUNC records are not generated for kfuncs referenced
771  * from inline assembly. These records are necessary for
772  * libbpf to link the program. The function below is a hack
773  * to ensure that BTF FUNC records are generated.
774  */
775 void __kfunc_btf_root(void)
776 {
777 	bpf_iter_num_new(0, 0, 0);
778 	bpf_iter_num_next(0);
779 	bpf_iter_num_destroy(0);
780 }
781 
782 /* Test that open-coded iterator kfunc arguments get precise stack
783  * liveness tracking. struct bpf_iter_num is 8 bytes (1 SPI).
784  *
785  * Insn layout:
786  *   0: *(u64*)(r10 - 8) = 0      write SPI 0 (dead)
787  *   1: *(u64*)(r10 - 16) = 0     write SPI 1 (dead)
788  *   2: r1 = r10
789  *   3: r1 += -24                 iter state at fp-24 (SPI 2)
790  *   4: r2 = 0
791  *   5: r3 = 10
792  *   6: call bpf_iter_num_new     defines SPI 2 (KF_ITER_NEW) → 0x0
793  *   7-8: r1 = fp-24
794  *   9: call bpf_iter_num_next    uses SPI 2 → 0x30
795  *  10: if r0 == 0 goto 2f
796  *  11: goto 1b
797  *  12-13: r1 = fp-24
798  *  14: call bpf_iter_num_destroy uses SPI 2 → 0x30
799  *  15: r0 = 0
800  *  16: exit
801  *
802  * At insn 6, SPI 2 is defined (KF_ITER_NEW initializes, doesn't read),
803  * so it kills liveness from successors. live_stack_before = 0x0.
804  * At insns 9 and 14, SPI 2 is used (iter_next/destroy read the state),
805  * so live_stack_before = 0x30.
806  */
807 SEC("socket")
808 __success __log_level(2)
809 __msg(" 6: (85) call bpf_iter_num_new{{.*}}          ; def: fp0-24{{$}}")
810 __msg(" 9: (85) call bpf_iter_num_next{{.*}}         ; use: fp0-24{{$}}")
811 __msg("14: (85) call bpf_iter_num_destroy{{.*}}      ; use: fp0-24{{$}}")
812 __naked void kfunc_iter_stack_liveness(void)
813 {
814 	asm volatile (
815 	"*(u64 *)(r10 - 8) = 0;"	/* SPI 0 - dead */
816 	"*(u64 *)(r10 - 16) = 0;"	/* SPI 1 - dead */
817 	"r1 = r10;"
818 	"r1 += -24;"
819 	"r2 = 0;"
820 	"r3 = 10;"
821 	"call %[bpf_iter_num_new];"
822 "1:"
823 	"r1 = r10;"
824 	"r1 += -24;"
825 	"call %[bpf_iter_num_next];"
826 	"if r0 == 0 goto 2f;"
827 	"goto 1b;"
828 "2:"
829 	"r1 = r10;"
830 	"r1 += -24;"
831 	"call %[bpf_iter_num_destroy];"
832 	"r0 = 0;"
833 	"exit;"
834 	:: __imm(bpf_iter_num_new),
835 	   __imm(bpf_iter_num_next),
836 	   __imm(bpf_iter_num_destroy)
837 	: __clobber_all);
838 }
839 
840 /*
841  * Test for soundness bug in static stack liveness analysis.
842  *
843  * The static pre-pass tracks FP-derived register offsets to determine
844  * which stack slots are accessed. When a PTR_TO_STACK is spilled to
845  * the stack and later reloaded, the reload (BPF_LDX) kills FP-derived
846  * tracking, making subsequent accesses through the reloaded pointer
847  * invisible to the static analysis.
848  *
849  * This causes the analysis to incorrectly mark SPI 0 as dead at the
850  * merge point. clean_verifier_state() zeros it in the cached state,
851  * and stacksafe() accepts the new state against STACK_INVALID,
852  * enabling incorrect pruning.
853  *
854  * Path A (verified first): stores PTR_TO_MAP_VALUE in SPI 0
855  * Path B (verified second): stores scalar 42 in SPI 0
856  * After merge: reads SPI 0 through spilled/reloaded PTR_TO_STACK
857  * and dereferences the result as a pointer.
858  *
859  * Correct behavior: reject (path B dereferences a scalar)
860  * Bug behavior: accept (path B is incorrectly pruned)
861  */
862 SEC("socket")
863 __flag(BPF_F_TEST_STATE_FREQ)
864 __failure __msg("R0 invalid mem access 'scalar'")
865 __naked void spill_ptr_liveness_type_confusion(void)
866 {
867 	asm volatile (
868 	/* Map lookup to get PTR_TO_MAP_VALUE */
869 	"r1 = %[map] ll;"
870 	"*(u32 *)(r10 - 32) = 0;"
871 	"r2 = r10;"
872 	"r2 += -32;"
873 	"call %[bpf_map_lookup_elem];"
874 	"if r0 == 0 goto l_exit%=;"
875 	/* r6 = PTR_TO_MAP_VALUE (callee-saved) */
876 	"r6 = r0;"
877 	/* Branch: fall-through (path A) verified first */
878 	"call %[bpf_get_prandom_u32];"
879 	"if r0 != 0 goto l_scalar%=;"
880 	/* Path A: store map value ptr at SPI 0 */
881 	"*(u64 *)(r10 - 8) = r6;"
882 	"goto l_merge%=;"
883 "l_scalar%=:"
884 	/* Path B: store scalar at SPI 0 */
885 	"r1 = 42;"
886 	"*(u64 *)(r10 - 8) = r1;"
887 "l_merge%=:"
888 	/*
889 	 * Spill PTR_TO_STACK{off=-8} to SPI 1, then reload.
890 	 * Reload kills FP-derived tracking, hiding the
891 	 * subsequent SPI 0 access from the static analysis.
892 	 */
893 	"r1 = r10;"
894 	"r1 += -8;"
895 	"*(u64 *)(r10 - 16) = r1;"
896 	"goto +0;"			/* checkpoint */
897 	"goto +0;"			/* checkpoint */
898 	"goto +0;"			/* checkpoint */
899 	"r1 = *(u64 *)(r10 - 16);"
900 	/* Read SPI 0 through reloaded pointer */
901 	"r0 = *(u64 *)(r1 + 0);"
902 	/* Dereference: safe for map value (path A),
903 	 * unsafe for scalar (path B).
904 	 */
905 	"r0 = *(u64 *)(r0 + 0);"
906 	"exit;"
907 "l_exit%=:"
908 	"r0 = 0;"
909 	"exit;"
910 	:
911 	: __imm(bpf_map_lookup_elem),
912 	  __imm(bpf_get_prandom_u32),
913 	  __imm_addr(map)
914 	: __clobber_all);
915 }
916 
917 /* === Tests for 4-byte stack slot liveness granularity === */
918 
919 /* Test that a 4-byte aligned write is stack_def and kills liveness.
920  *
921  *   0: *(u64 *)(r10 - 8) = 0      def slots 0,1 (full SPI 0)
922  *   1: *(u32 *)(r10 - 8) = 0      def slot 1 (4-byte write kills slot 1)
923  *   2: r0 = *(u64 *)(r10 - 8)     use slots 0,1
924  *   3: r0 = 0
925  *   4: exit
926  *
927  * At insn 1, the 4-byte write defines slot 1. Slot 0 still flows
928  * backward from insn 2's read: live_stack_before = 0x1.
929  */
930 SEC("socket")
931 __log_level(2)
932 __msg("1: (62) *(u32 *)(r10 -8) = 0         ; def: fp0-8h")
933 __naked void four_byte_write_kills_slot(void)
934 {
935 	asm volatile (
936 	"*(u64 *)(r10 - 8) = 0;"
937 	"*(u32 *)(r10 - 8) = 0;"
938 	"r0 = *(u64 *)(r10 - 8);"
939 	"r0 = 0;"
940 	"exit;"
941 	::: __clobber_all);
942 }
943 
944 /* Test that a write to the upper half of an SPI is dead when only
945  * the lower half is read. This was impossible at SPI granularity
946  * where any read of the SPI kept the entire SPI live.
947  *
948  *   0: *(u32 *)(r10 - 8) = 0      def slot 1 (DEAD: never read)
949  *   1: *(u32 *)(r10 - 4) = 0      def slot 0
950  *   2: r0 = *(u32 *)(r10 - 4)     use slot 0 only
951  *   3: r0 = 0
952  *   4: exit
953  *
954  * At insn 0, nothing is live (0x0). Previously at SPI granularity,
955  * the read at insn 2 would mark the full SPI 0 as live and the
956  * 4-byte writes wouldn't count as def, so insn 0 would have had
957  * SPI 0 live (0x1).
958  */
959 SEC("socket")
960 __log_level(2)
961 __msg("0: (62) *(u32 *)(r10 -8) = 0         ; def: fp0-8h")
962 __msg("2: (61) r0 = *(u32 *)(r10 -4)        ; use: fp0-4h")
963 __naked void dead_half_spi_write(void)
964 {
965 	asm volatile (
966 	"*(u32 *)(r10 - 8) = 0;"
967 	"*(u32 *)(r10 - 4) = 0;"
968 	"r0 = *(u32 *)(r10 - 4);"
969 	"r0 = 0;"
970 	"exit;"
971 	::: __clobber_all);
972 }
973 
974 /* Test that a 4-byte read from the upper half of SPI 0 makes only
975  * slot 1 live (0x2), not the full SPI (0x3).
976  *
977  *   0: *(u64 *)(r10 - 8) = 0      def slots 0,1
978  *   1: r0 = *(u32 *)(r10 - 8)     use slot 1 only (upper half)
979  *   2: r0 = 0
980  *   3: exit
981  *
982  * At insn 1, live_stack_before = 0x2 (slot 1 only).
983  */
984 SEC("socket")
985 __log_level(2)
986 __msg("1: (61) r0 = *(u32 *)(r10 -8)        ; use: fp0-8h")
987 __naked void four_byte_read_upper_half(void)
988 {
989 	asm volatile (
990 	"*(u64 *)(r10 - 8) = 0;"
991 	"r0 = *(u32 *)(r10 - 8);"
992 	"r0 = 0;"
993 	"exit;"
994 	::: __clobber_all);
995 }
996 
997 /* Test that a 2-byte write does NOT count as stack_def.
998  * Sub-4-byte writes don't fully cover a 4-byte slot,
999  * so liveness passes through.
1000  *
1001  *   0: *(u64 *)(r10 - 8) = 0      def slots 0,1
1002  *   1: *(u16 *)(r10 - 4) = 0      NOT stack_def (2 < 4 bytes)
1003  *   2: r0 = *(u32 *)(r10 - 4)     use slot 0
1004  *   3: r0 = 0
1005  *   4: exit
1006  *
1007  * At insn 1, slot 0 still live (0x1) because 2-byte write
1008  * didn't kill it.
1009  */
1010 SEC("socket")
1011 __log_level(2)
1012 __msg("0: (7a) *(u64 *)(r10 -8) = 0         ; def: fp0-8")
1013 __msg("1: (6a) *(u16 *)(r10 -4) = 0{{$}}")
1014 __msg("2: (61) r0 = *(u32 *)(r10 -4)        ; use: fp0-4h")
1015 __naked void two_byte_write_no_kill(void)
1016 {
1017 	asm volatile (
1018 	"*(u64 *)(r10 - 8) = 0;"
1019 	"*(u16 *)(r10 - 4) = 0;"
1020 	"r0 = *(u32 *)(r10 - 4);"
1021 	"r0 = 0;"
1022 	"exit;"
1023 	::: __clobber_all);
1024 }
1025 
1026 /* Test that a 1-byte write does NOT count as stack_def.
1027  *
1028  *   0: *(u64 *)(r10 - 8) = 0      def slots 0,1
1029  *   1: *(u8 *)(r10 - 4) = 0       NOT stack_def (1 < 4 bytes)
1030  *   2: r0 = *(u32 *)(r10 - 4)     use slot 0
1031  *   3: r0 = 0
1032  *   4: exit
1033  *
1034  * At insn 1, slot 0 still live (0x1).
1035  */
1036 SEC("socket")
1037 __log_level(2)
1038 __msg("0: (7a) *(u64 *)(r10 -8) = 0         ; def: fp0-8")
1039 __msg("1: (72) *(u8 *)(r10 -4) = 0")
1040 __msg("2: (61) r0 = *(u32 *)(r10 -4)        ; use: fp0-4h")
1041 __naked void one_byte_write_no_kill(void)
1042 {
1043 	asm volatile (
1044 	"*(u64 *)(r10 - 8) = 0;"
1045 	"*(u8 *)(r10 - 4) = 0;"
1046 	"r0 = *(u32 *)(r10 - 4);"
1047 	"r0 = 0;"
1048 	"exit;"
1049 	::: __clobber_all);
1050 }
1051 
1052 /* Test stack access beyond fp-256 exercising the second bitmask word.
1053  * fp-264 is SPI 32, slots 64-65, which are bits 0-1 of live_stack[1].
1054  *
1055  *   0: *(u64 *)(r10 - 264) = 0     def slots 64,65
1056  *   1: r0 = *(u64 *)(r10 - 264)    use slots 64,65
1057  *   2: r0 = 0
1058  *   3: exit
1059  *
1060  * At insn 1, live_stack high word has bits 0,1 set: 0x3:0x0.
1061  */
1062 SEC("socket")
1063 __log_level(2)
1064 __msg("1: (79) r0 = *(u64 *)(r10 -264)      ; use: fp0-264")
1065 __naked void high_stack_second_bitmask_word(void)
1066 {
1067 	asm volatile (
1068 	"*(u64 *)(r10 - 264) = 0;"
1069 	"r0 = *(u64 *)(r10 - 264);"
1070 	"r0 = 0;"
1071 	"exit;"
1072 	::: __clobber_all);
1073 }
1074 
1075 /* Test that two separate 4-byte writes to each half of an SPI
1076  * together kill liveness for the full SPI.
1077  *
1078  *   0: *(u32 *)(r10 - 8) = 0      def slot 1 (upper half)
1079  *   1: *(u32 *)(r10 - 4) = 0      def slot 0 (lower half)
1080  *   2: r0 = *(u64 *)(r10 - 8)     use slots 0,1
1081  *   3: r0 = 0
1082  *   4: exit
1083  *
1084  * At insn 0: live_stack_before = 0x0 (both slots killed by insns 0,1).
1085  * At insn 1: live_stack_before = 0x2 (slot 1 still live, slot 0 killed here).
1086  */
1087 SEC("socket")
1088 __log_level(2)
1089 __msg("0: (62) *(u32 *)(r10 -8) = 0         ; def: fp0-8h")
1090 __msg("1: (62) *(u32 *)(r10 -4) = 0         ; def: fp0-4h")
1091 __naked void two_four_byte_writes_kill_full_spi(void)
1092 {
1093 	asm volatile (
1094 	"*(u32 *)(r10 - 8) = 0;"
1095 	"*(u32 *)(r10 - 4) = 0;"
1096 	"r0 = *(u64 *)(r10 - 8);"
1097 	"r0 = 0;"
1098 	"exit;"
1099 	::: __clobber_all);
1100 }
1101 
1102 /* Test that 4-byte writes on both branches kill a slot at the
1103  * join point. Previously at SPI granularity, a 4-byte write was
1104  * not stack_def, so liveness would flow backward through the
1105  * branch that only had a 4-byte write.
1106  *
1107  *   0: call bpf_get_prandom_u32
1108  *   1: if r0 != 0 goto 1f
1109  *   2: *(u64 *)(r10 - 8) = 0       path A: def slots 0,1
1110  *   3: goto 2f
1111  * 1:4: *(u32 *)(r10 - 4) = 0       path B: def slot 0
1112  * 2:5: r0 = *(u32 *)(r10 - 4)      use slot 0
1113  *   6: r0 = 0
1114  *   7: exit
1115  *
1116  * Both paths define slot 0 before the read. At insn 1 (branch),
1117  * live_stack_before = 0x0 because slot 0 is killed on both paths.
1118  */
1119 SEC("socket")
1120 __log_level(2)
1121 __msg("1: (55) if r0 != 0x0 goto pc+2")
1122 __msg("2: (7a) *(u64 *)(r10 -8) = 0         ; def: fp0-8")
1123 __msg("3: (05) goto pc+1")
1124 __msg("4: (62) *(u32 *)(r10 -4) = 0         ; def: fp0-4h")
1125 __msg("5: (61) r0 = *(u32 *)(r10 -4)        ; use: fp0-4h")
1126 __naked void both_branches_kill_slot(void)
1127 {
1128 	asm volatile (
1129 	"call %[bpf_get_prandom_u32];"
1130 	"if r0 != 0 goto 1f;"
1131 	"*(u64 *)(r10 - 8) = 0;"
1132 	"goto 2f;"
1133 "1:"
1134 	"*(u32 *)(r10 - 4) = 0;"
1135 "2:"
1136 	"r0 = *(u32 *)(r10 - 4);"
1137 	"r0 = 0;"
1138 	"exit;"
1139 	:: __imm(bpf_get_prandom_u32)
1140 	: __clobber_all);
1141 }
1142 
1143 /* Soundness: cleaning the dead upper half of an SPI must not
1144  * affect the live lower half's type information for pruning.
1145  *
1146  * Both halves of SPI 0 are written separately. Only the lower
1147  * half (slot 0) is used as a 4-byte map key. The upper half
1148  * (slot 1) is dead and cleaned to STACK_INVALID.
1149  *
1150  * Path A: key stays 0 (STACK_ZERO) → non-null array lookup
1151  * Path B: key byte turns STACK_MISC → may-null array lookup
1152  * Deref without null check: safe for A, unsafe for B.
1153  *
1154  * If half-SPI cleaning incorrectly corrupted the live half's
1155  * type info, path A's cached state could generalize and unsoundly
1156  * prune path B.
1157  *
1158  * Expected: reject (path B unsafe).
1159  */
1160 SEC("socket")
1161 __flag(BPF_F_TEST_STATE_FREQ)
1162 __failure __msg("R0 invalid mem access 'map_value_or_null'")
1163 __naked void half_spi_clean_preserves_stack_zero(void)
1164 {
1165 	asm volatile (
1166 	"*(u32 *)(r10 - 4) = 0;"           /* slot 0: STACK_ZERO */
1167 	"*(u32 *)(r10 - 8) = 0;"           /* slot 1: STACK_ZERO (dead) */
1168 	"call %[bpf_get_prandom_u32];"
1169 	"if r0 != 0 goto l_nonconst%=;"
1170 	"goto l_lookup%=;"
1171 "l_nonconst%=:"
1172 	"*(u8 *)(r10 - 4) = r0;"           /* slot 0: STACK_MISC */
1173 "l_lookup%=:"
1174 	"r2 = r10;"
1175 	"r2 += -4;"
1176 	"r1 = %[array_map_8b] ll;"
1177 	"call %[bpf_map_lookup_elem];"
1178 	"r0 = *(u64 *)(r0 + 0);"           /* unsafe if null */
1179 	"exit;"
1180 	:
1181 	: __imm(bpf_get_prandom_u32),
1182 	  __imm(bpf_map_lookup_elem),
1183 	  __imm_addr(array_map_8b)
1184 	: __clobber_all);
1185 }
1186 
1187 /*
1188  * Model of scx_lavd's pick_idle_cpu_at_cpdom iat block:
1189  * conditional block with helper call and temporary stack spill,
1190  * spill dead after merge.
1191  *
1192  * Path A (fall-through): spill r6 to fp-8 across helper call
1193  * Path B (branch taken): skip the block entirely
1194  * At merge (insn 6): fp-8 is dead (never read after merge)
1195  *
1196  * Static liveness marks fp-8 dead at merge. clean_verifier_state()
1197  * converts path A's STACK_SPILL to STACK_INVALID. Path B has
1198  * STACK_INVALID. stacksafe() matches -> path B pruned -> "6: safe".
1199  */
1200 SEC("socket")
1201 __flag(BPF_F_TEST_STATE_FREQ)
1202 __success
1203 __log_level(2)
1204 __msg("6: safe")
1205 __naked void dead_spill_at_merge_enables_pruning(void)
1206 {
1207 	asm volatile (
1208 	"call %[bpf_get_prandom_u32];"
1209 	"r6 = 7;"
1210 	"if r0 != 0 goto l_skip%=;"
1211 	/* conditional block: spill, call, reload */
1212 	"*(u64 *)(r10 - 8) = r6;"
1213 	"call %[bpf_get_prandom_u32];"
1214 	"r6 = *(u64 *)(r10 - 8);"
1215 "l_skip%=:"
1216 	/* fp-8 dead. Path B pruned here -> "6: safe" */
1217 	"r0 = r6;"
1218 	"exit;"
1219 	:
1220 	: __imm(bpf_get_prandom_u32)
1221 	: __clobber_all);
1222 }
1223 
1224 /*
1225  * FP-offset tracking loses precision on second ADD, killing all liveness.
1226  *
1227  * fp_off_insn_xfer() handles "FP itself + negative imm" precisely
1228  * (e.g. r6 = r10; r6 += -24 -> slot 5).  But any subsequent ADD/SUB
1229  * on a register that already has non-zero spis falls through to
1230  * spis_set_all(), because the code only handles the FP-itself case.
1231  *
1232  * A write through this imprecise register enters the non-zero-spis
1233  * branch of set_indirect_stack_access(), which OR's the all-ones
1234  * mask into stack_def.  The backward liveness equation
1235  *
1236  *   stack_in = (stack_out & ~stack_def) | stack_use
1237  *
1238  * sees ~ALL = 0, killing ALL slot liveness at that instruction.
1239  *
1240  * At the merge pruning point, live_stack_before is empty.
1241  * clean_verifier_state() marks fp-8 as STACK_INVALID.
1242  * stacksafe() skips STACK_INVALID (line "continue"), so pruning
1243  * succeeds regardless of the current state's fp-8 value.
1244  * Path B is pruned, its null deref is never explored.
1245  *
1246  * Correct behavior: reject (path B dereferences NULL).
1247  * Bug behavior: accept (path B pruned away).
1248  */
1249 SEC("socket")
1250 __flag(BPF_F_TEST_STATE_FREQ)
1251 __failure __msg("R1 invalid mem access 'scalar'")
1252 __naked void fp_add_loses_precision_kills_liveness(void)
1253 {
1254 	asm volatile (
1255 	"call %[bpf_get_prandom_u32];"
1256 	"if r0 != 0 goto l_pathB%=;"
1257 
1258 	/* Path A (fall-through, explored first): fp-8 = 0 */
1259 	"r1 = 0;"
1260 	"*(u64 *)(r10 - 8) = r1;"
1261 	"goto l_merge%=;"
1262 
1263 "l_pathB%=:"
1264 	/* Path B (explored second): fp-8 = 42 */
1265 	"r1 = 42;"
1266 	"*(u64 *)(r10 - 8) = r1;"
1267 
1268 "l_merge%=:"
1269 	/*
1270 	 * Create imprecise FP-derived register.
1271 	 * r6 = r10 - 24 gets precise slot 5.
1272 	 * r6 += 8 hits the else branch (spis non-zero, delta > 0)
1273 	 * and sets spis to ALL.  r6 is actually r10-16.
1274 	 */
1275 	"r6 = r10;"
1276 	"r6 += -24;"
1277 	"r6 += 8;"
1278 
1279 	/*
1280 	 * Write through imprecise r6.  Actually writes to fp-16
1281 	 * (does NOT touch fp-8), but liveness marks ALL slots
1282 	 * as stack_def, killing fp-8's liveness.
1283 	 */
1284 	"r7 = 0;"
1285 	"*(u64 *)(r6 + 0) = r7;"
1286 
1287 	/* Read fp-8: liveness says dead, but value is needed. */
1288 	"r2 = *(u64 *)(r10 - 8);"
1289 	"if r2 == 42 goto l_danger%=;"
1290 
1291 	/* r2 != 42 (path A: r2 == 0): safe exit */
1292 	"r0 = 0;"
1293 	"exit;"
1294 
1295 "l_danger%=:"
1296 	/* Only reachable from path B (r2 == 42): null deref */
1297 	"r1 = 0;"
1298 	"r0 = *(u64 *)(r1 + 0);"
1299 	"exit;"
1300 	:
1301 	: __imm(bpf_get_prandom_u32)
1302 	: __clobber_all);
1303 }
1304 
1305 SEC("socket")
1306 __flag(BPF_F_TEST_STATE_FREQ)
1307 __failure __msg("R1 invalid mem access 'scalar'")
1308 __naked void fp_spill_loses_precision_kills_liveness(void)
1309 {
1310 	asm volatile (
1311 	"call %[bpf_get_prandom_u32];"
1312 	"if r0 != 0 goto l_pathB%=;"
1313 
1314 	"r1 = 0;"
1315 	"*(u64 *)(r10 - 8) = r1;"
1316 	"goto l_merge%=;"
1317 
1318 "l_pathB%=:"
1319 	"r1 = 42;"
1320 	"*(u64 *)(r10 - 8) = r1;"
1321 
1322 "l_merge%=:"
1323 	"r6 = r10;"
1324 	"r6 += -64;"
1325 	"*(u64 *)(r10 - 160) = r6;"
1326 	"r6 = *(u64 *)(r10 - 160);"
1327 
1328 	"r7 = 0;"
1329 	"*(u64 *)(r6 + 0) = r7;"
1330 
1331 	"r2 = *(u64 *)(r10 - 8);"
1332 	"if r2 == 42 goto l_danger%=;"
1333 
1334 	"r0 = *(u64 *)(r10 - 56);"
1335 	"exit;"
1336 
1337 "l_danger%=:"
1338 	"r1 = 0;"
1339 	"r0 = *(u64 *)(r1 + 0);"
1340 	"exit;"
1341 	:
1342 	: __imm(bpf_get_prandom_u32)
1343 	: __clobber_all);
1344 }
1345 
1346 /* === Tests for frame-based AT_FP tracking === */
1347 
1348 /*
1349  * Test 1: conditional_stx_in_subprog
1350  * Subprog conditionally writes caller's slot.
1351  * Verify slot stays live (backward pass handles conditional def via CFG).
1352  *
1353  * Main writes fp-8=42, calls cond_writer(fp-8), reads fp-8.
1354  * cond_writer only writes on one path → parent_def only on that path.
1355  * The backward parent_live correctly keeps fp-8 live at entry
1356  * (conditional write doesn't kill liveness at the join).
1357  */
1358 SEC("socket")
1359 __log_level(2)
1360 /* fp-8 live at call (callee conditionally writes → slot not killed) */
1361 __msg("1: (7b) *(u64 *)(r10 -8) = r1        ; def: fp0-8")
1362 __msg("4: (85) call pc+2{{$}}")
1363 __msg("5: (79) r0 = *(u64 *)(r10 -8)        ; use: fp0-8")
1364 __naked void conditional_stx_in_subprog(void)
1365 {
1366 	asm volatile (
1367 	"r1 = 42;"
1368 	"*(u64 *)(r10 - 8) = r1;"
1369 	"r1 = r10;"
1370 	"r1 += -8;"
1371 	"call cond_writer;"
1372 	"r0 = *(u64 *)(r10 - 8);"
1373 	"exit;"
1374 	::: __clobber_all);
1375 }
1376 
1377 /* Conditionally writes to *(r1+0) */
1378 static __used __naked void cond_writer(void)
1379 {
1380 	asm volatile (
1381 	"r6 = r1;"
1382 	"call %[bpf_get_prandom_u32];"
1383 	"if r0 == 0 goto 1f;"
1384 	"*(u64 *)(r6 + 0) = r0;"
1385 	"1:"
1386 	"r0 = 0;"
1387 	"exit;"
1388 	:: __imm(bpf_get_prandom_u32)
1389 	: __clobber_all);
1390 }
1391 
1392 SEC("socket")
1393 __log_level(2)
1394 __msg("4: (85) call pc+{{.*}}                   ; use: fp0-16")
1395 __msg("7: (85) call pc+{{.*}}                   ; use: fp0-32")
1396 __naked void multiple_callsites_different_offsets(void)
1397 {
1398 	asm volatile (
1399 	"*(u64 *)(r10 - 16) = 0;"
1400 	"*(u64 *)(r10 - 32) = 0;"
1401 	"r1 = r10;"
1402 	"r1 += -16;"
1403 	"call read_first_param;"
1404 	"r1 = r10;"
1405 	"r1 += -32;"
1406 	"call read_first_param;"
1407 	"r0 = 0;"
1408 	"exit;"
1409 	::: __clobber_all);
1410 }
1411 
1412 /*
1413  * Test 3: nested_fp_passthrough
1414  * main→A→B, main's FP forwarded to B. B accesses main's stack.
1415  * Verify liveness propagates through.
1416  *
1417  * Main passes fp-32 to outer_forwarder, which passes it to inner_reader.
1418  * inner_reader reads at arg+0 (= main's fp-32).
1419  * parent_live propagates transitively: inner→outer→main.
1420  */
1421 SEC("socket")
1422 __log_level(2)
1423 /* At call to outer_forwarder: main's fp-32 (slots 6,7) should be live */
1424 __msg("6: (85) call pc+{{.*}}                   ; use: fp0-32")
1425 __naked void nested_fp_passthrough(void)
1426 {
1427 	asm volatile (
1428 	"*(u64 *)(r10 - 8) = 0;"
1429 	"*(u64 *)(r10 - 16) = 0;"
1430 	"*(u64 *)(r10 - 24) = 0;"
1431 	"*(u64 *)(r10 - 32) = 0;"
1432 	"r1 = r10;"
1433 	"r1 += -32;"
1434 	"call outer_forwarder;"
1435 	"r0 = 0;"
1436 	"exit;"
1437 	::: __clobber_all);
1438 }
1439 
1440 /* Forwards arg to inner_reader */
1441 static __used __naked void outer_forwarder(void)
1442 {
1443 	asm volatile (
1444 	"call inner_reader;"
1445 	"r0 = 0;"
1446 	"exit;"
1447 	::: __clobber_all);
1448 }
1449 
1450 static __used __naked void inner_reader(void)
1451 {
1452 	asm volatile (
1453 	"r0 = *(u64 *)(r1 + 0);"
1454 	"r0 = 0;"
1455 	"exit;"
1456 	::: __clobber_all);
1457 }
1458 
1459 /*
1460  * Test 4: callee_must_write_before_read
1461  * Callee unconditionally writes parent slot before reading.
1462  * Verify slot is NOT live at call site (parent_def kills it).
1463  */
1464 SEC("socket")
1465 __log_level(2)
1466 /* fp-8 NOT live at call: callee writes before reading (parent_def kills it) */
1467 __msg("2: .12345.... (85) call pc+")
1468 __naked void callee_must_write_before_read(void)
1469 {
1470 	asm volatile (
1471 	"r1 = r10;"
1472 	"r1 += -8;"
1473 	"call write_then_read;"
1474 	"r0 = 0;"
1475 	"exit;"
1476 	::: __clobber_all);
1477 }
1478 
1479 /* Unconditionally writes *(r1+0), then reads it back */
1480 static __used __naked void write_then_read(void)
1481 {
1482 	asm volatile (
1483 	"r6 = r1;"
1484 	"r7 = 99;"
1485 	"*(u64 *)(r6 + 0) = r7;"
1486 	"r0 = *(u64 *)(r6 + 0);"
1487 	"r0 = 0;"
1488 	"exit;"
1489 	::: __clobber_all);
1490 }
1491 
1492 /*
1493  * Test 5: return_site_liveness_bleeding
1494  * Main calls subprog twice. Slot used after one call but not the other.
1495  * Context-insensitive: slot conservatively live at both.
1496  *
1497  * After first call: read fp-8.
1498  * After second call: don't read fp-8.
1499  * Since parent_live is per-subprog (not per call-site),
1500  * fp-8 is live at both call sites.
1501  */
1502 SEC("socket")
1503 __log_level(2)
1504 /* Both calls have fp-8 live due to context-insensitive parent_live */
1505 __msg("3: (85) call pc+{{.*}}                   ; use: fp0-8")
1506 __msg("7: (85) call pc+{{.*}}                   ; use: fp0-8")
1507 __naked void return_site_liveness_bleeding(void)
1508 {
1509 	asm volatile (
1510 	"*(u64 *)(r10 - 8) = 0;"
1511 	"r1 = r10;"
1512 	"r1 += -8;"
1513 	"call read_first_param;"
1514 	"r0 = *(u64 *)(r10 - 8);"
1515 	"r1 = r10;"
1516 	"r1 += -8;"
1517 	"call read_first_param;"
1518 	"r0 = 0;"
1519 	"exit;"
1520 	::: __clobber_all);
1521 }
1522 
1523 SEC("socket")
1524 __log_level(2)
1525 __msg("9: (85) call bpf_loop#181            ; use: fp0-16")
1526 __naked void callback_conditional_read_beyond_ctx(void)
1527 {
1528 	asm volatile (
1529 	"r1 = 42;"
1530 	"*(u64 *)(r10 - 8) = r1;"
1531 	"*(u64 *)(r10 - 16) = r1;"
1532 	"r1 = 2;"
1533 	"r2 = cb_cond_read ll;"
1534 	"r3 = r10;"
1535 	"r3 += -8;"
1536 	"r4 = 0;"
1537 	"call %[bpf_loop];"
1538 	"r0 = 0;"
1539 	"exit;"
1540 	:: __imm(bpf_loop)
1541 	: __clobber_all);
1542 }
1543 
1544 /* Callback conditionally reads *(ctx - 8) = caller fp-16 */
1545 static __used __naked void cb_cond_read(void)
1546 {
1547 	asm volatile (
1548 	"r6 = r2;"
1549 	"call %[bpf_get_prandom_u32];"
1550 	"if r0 == 0 goto 1f;"
1551 	"r0 = *(u64 *)(r6 - 8);"
1552 	"1:"
1553 	"r0 = 0;"
1554 	"exit;"
1555 	:: __imm(bpf_get_prandom_u32)
1556 	: __clobber_all);
1557 }
1558 
1559 SEC("socket")
1560 __log_level(2)
1561 __msg("14: (7b) *(u64 *)(r6 -8) = r7         ; def: fp0-16")
1562 __msg("15: (79) r0 = *(u64 *)(r6 -8)         ; use: fp0-16")
1563 __naked void callback_write_before_read_kills(void)
1564 {
1565 	asm volatile (
1566 	"r1 = 42;"
1567 	"*(u64 *)(r10 - 8) = r1;"
1568 	"*(u64 *)(r10 - 16) = r1;"
1569 	"r1 = 2;"
1570 	"r2 = cb_write_read ll;"
1571 	"r3 = r10;"
1572 	"r3 += -8;"
1573 	"r4 = 0;"
1574 	"call %[bpf_loop];"
1575 	"r0 = 0;"
1576 	"exit;"
1577 	:: __imm(bpf_loop)
1578 	: __clobber_all);
1579 }
1580 
1581 /* Callback unconditionally writes *(ctx-8), then reads it back.
1582  * The write (parent_def) kills liveness before entry.
1583  */
1584 static __used __naked void cb_write_read(void)
1585 {
1586 	asm volatile (
1587 	"r6 = r2;"
1588 	"r7 = 99;"
1589 	"*(u64 *)(r6 - 8) = r7;"
1590 	"r0 = *(u64 *)(r6 - 8);"
1591 	"r0 = 0;"
1592 	"exit;"
1593 	::: __clobber_all);
1594 }
1595 
1596 /*
1597  * bpf_loop callback conditionally writes fp-16 then unconditionally
1598  * reads it. The conditional write does NOT kill liveness
1599  */
1600 SEC("socket")
1601 __log_level(2)
1602 __msg("9: (85) call bpf_loop#181            ; use: fp0-16")
1603 __naked void callback_conditional_write_preserves(void)
1604 {
1605 	asm volatile (
1606 	"r1 = 42;"
1607 	"*(u64 *)(r10 - 8) = r1;"
1608 	"*(u64 *)(r10 - 16) = r1;"
1609 	"r1 = 2;"
1610 	"r2 = cb_cond_write_read ll;"
1611 	"r3 = r10;"
1612 	"r3 += -8;"
1613 	"r4 = 0;"
1614 	"call %[bpf_loop];"
1615 	"r0 = 0;"
1616 	"exit;"
1617 	:: __imm(bpf_loop)
1618 	: __clobber_all);
1619 }
1620 
1621 static __used __naked void cb_cond_write_read(void)
1622 {
1623 	asm volatile (
1624 	"r6 = r2;"
1625 	"call %[bpf_get_prandom_u32];"
1626 	"if r0 == 0 goto 1f;"
1627 	"*(u64 *)(r6 - 8) = r0;"
1628 	"1:"
1629 	"r0 = *(u64 *)(r6 - 8);"
1630 	"r0 = 0;"
1631 	"exit;"
1632 	:: __imm(bpf_get_prandom_u32)
1633 	: __clobber_all);
1634 }
1635 
1636 /*
1637  * Two bpf_loop calls with the same callback but different ctx pointers.
1638  *
1639  * First call: ctx=fp-8, second call: ctx=fp-24.
1640  */
1641 SEC("socket")
1642 __log_level(2)
1643 __msg(" 8: (85) call bpf_loop{{.*}}            ; use: fp0-8")
1644 __msg("15: (85) call bpf_loop{{.*}}            ; use: fp0-24")
1645 __naked void callback_two_calls_different_ctx(void)
1646 {
1647 	asm volatile (
1648 	"*(u64 *)(r10 - 8) = 0;"
1649 	"*(u64 *)(r10 - 24) = 0;"
1650 	"r1 = 1;"
1651 	"r2 = cb_read_ctx ll;"
1652 	"r3 = r10;"
1653 	"r3 += -8;"
1654 	"r4 = 0;"
1655 	"call %[bpf_loop];"
1656 	"r1 = 1;"
1657 	"r2 = cb_read_ctx ll;"
1658 	"r3 = r10;"
1659 	"r3 += -24;"
1660 	"r4 = 0;"
1661 	"call %[bpf_loop];"
1662 	"r0 = 0;"
1663 	"exit;"
1664 	:: __imm(bpf_loop)
1665 	: __clobber_all);
1666 }
1667 
1668 /* Callback reads at ctx+0 unconditionally */
1669 static __used __naked void cb_read_ctx(void)
1670 {
1671 	asm volatile (
1672 	"r0 = *(u64 *)(r2 + 0);"
1673 	"r0 = 0;"
1674 	"exit;"
1675 	::: __clobber_all);
1676 }
1677 
1678 /*
1679  * Reproducer for unsound pruning in refined_caller_live_stack().
1680  *
1681  * Three-level call chain: main → mid_fwd → grandchild_deref.
1682  * Main passes &fp-8 to mid_fwd, which forwards R1 to grandchild_deref.
1683  * grandchild_deref reads main's fp-8 through the forwarded pointer
1684  * and dereferences the result.
1685  *
1686  * refined_caller_live_stack() has a callee_offset++ when mid_fwd
1687  * (frame 1) is mid-call. This drops the transitive parent_live
1688  * contribution at mid_fwd's call instruction — the only place
1689  * where grandchild_deref's read of main's fp-8 is recorded.
1690  * As a result, main's fp-8 is cleaned to STACK_INVALID at the
1691  * pruning point inside grandchild_deref, and path B is
1692  * incorrectly pruned against path A.
1693  *
1694  * Path A: main stores PTR_TO_MAP_VALUE at fp-8
1695  * Path B: main stores scalar 42 at fp-8
1696  *
1697  * Correct behavior: reject (path B dereferences scalar)
1698  * Bug behavior: accept (path B pruned against cleaned path A)
1699  */
1700 SEC("socket")
1701 __flag(BPF_F_TEST_STATE_FREQ)
1702 __failure __msg("R0 invalid mem access 'scalar'")
1703 __naked void transitive_parent_stack_read_unsound(void)
1704 {
1705 	asm volatile (
1706 	/* Map lookup to get PTR_TO_MAP_VALUE */
1707 	"r1 = %[map] ll;"
1708 	"*(u32 *)(r10 - 32) = 0;"
1709 	"r2 = r10;"
1710 	"r2 += -32;"
1711 	"call %[bpf_map_lookup_elem];"
1712 	"if r0 == 0 goto l_exit%=;"
1713 	"r6 = r0;"
1714 	/* Branch: path A (fall-through) explored first */
1715 	"call %[bpf_get_prandom_u32];"
1716 	"if r0 != 0 goto l_scalar%=;"
1717 	/* Path A: fp-8 = PTR_TO_MAP_VALUE */
1718 	"*(u64 *)(r10 - 8) = r6;"
1719 	"goto l_merge%=;"
1720 "l_scalar%=:"
1721 	/* Path B: fp-8 = scalar 42 */
1722 	"r1 = 42;"
1723 	"*(u64 *)(r10 - 8) = r1;"
1724 "l_merge%=:"
1725 	/* Pass &fp-8 to mid_fwd → grandchild_deref */
1726 	"r1 = r10;"
1727 	"r1 += -8;"
1728 	"call mid_fwd;"
1729 	"r0 = 0;"
1730 	"exit;"
1731 "l_exit%=:"
1732 	"r0 = 0;"
1733 	"exit;"
1734 	:
1735 	: __imm(bpf_map_lookup_elem),
1736 	  __imm(bpf_get_prandom_u32),
1737 	  __imm_addr(map)
1738 	: __clobber_all);
1739 }
1740 
1741 /* Forwards R1 (ptr to main's fp-8) to grandchild_deref */
1742 static __used __naked void mid_fwd(void)
1743 {
1744 	asm volatile (
1745 	"call grandchild_deref;"
1746 	"r0 = 0;"
1747 	"exit;"
1748 	::: __clobber_all);
1749 }
1750 
1751 /* Reads main's fp-8 through forwarded pointer, dereferences result */
1752 static __used __naked void grandchild_deref(void)
1753 {
1754 	asm volatile (
1755 	"goto +0;"				/* checkpoint */
1756 	"goto +0;"				/* checkpoint */
1757 	/* read main's fp-8: map_ptr (path A) or scalar (path B) */
1758 	"r0 = *(u64 *)(r1 + 0);"
1759 	/* dereference: safe for map_ptr, unsafe for scalar */
1760 	"r0 = *(u64 *)(r0 + 0);"
1761 	"r0 = 0;"
1762 	"exit;"
1763 	::: __clobber_all);
1764 }
1765 
1766 SEC("socket")
1767 __log_level(2)
1768 __success
1769 __msg("14: (79) r1 = *(u64 *)(r10 -8) // r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
1770 __msg("15: (79) r0 = *(u64 *)(r1 +0) // r1=fp1-16 r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
1771 __msg("stack use/def subprog#1 mid_two_fp_threshold (d1,cs2):")
1772 __msg("14: (79) r1 = *(u64 *)(r10 -8)        ; use: fp1-8")
1773 __msg("15: (79) r0 = *(u64 *)(r1 +0)         ; use: fp1-16")
1774 __naked void two_fp_clear_stack_threshold(void)
1775 {
1776 	asm volatile (
1777 	"r1 = r10;"
1778 	"r1 += -8;"
1779 	"call mid_two_fp_threshold;"
1780 	"r0 = 0;"
1781 	"exit;"
1782 	::: __clobber_all);
1783 }
1784 
1785 static __used __naked void mid_two_fp_threshold(void)
1786 {
1787 	asm volatile (
1788 	"r6 = r1;"
1789 	"r7 = r10;"
1790 	"r7 += -16;"
1791 	"*(u64 *)(r10 - 8) = r7;"
1792 	"*(u64 *)(r10 - 16) = r6;"
1793 	"r1 = r10;"
1794 	"r1 += -8;"
1795 	"r2 = r6;"
1796 	"call inner_nop_fptest;"
1797 	"r1 = *(u64 *)(r10 - 8);"
1798 	"r0 = *(u64 *)(r1 + 0);"
1799 	"r0 = 0;"
1800 	"exit;"
1801 	::: __clobber_all);
1802 }
1803 
1804 static __used __naked void inner_nop_fptest(void)
1805 {
1806 	asm volatile (
1807 	"r0 = 0;"
1808 	"exit;"
1809 	::: __clobber_all);
1810 }
1811 
1812 SEC("socket")
1813 __log_level(2)
1814 __success
1815 __msg("13: (79) r1 = *(u64 *)(r10 -8) // r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
1816 __msg("14: (79) r0 = *(u64 *)(r1 +0) // r1=fp1-16 r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
1817 __msg("stack use/def subprog#1 mid_one_fp_threshold (d1,cs2):")
1818 __msg("13: (79) r1 = *(u64 *)(r10 -8)        ; use: fp1-8")
1819 __msg("14: (79) r0 = *(u64 *)(r1 +0)         ; use: fp1-16")
1820 __naked void one_fp_clear_stack_threshold(void)
1821 {
1822 	asm volatile (
1823 	"r1 = r10;"
1824 	"r1 += -8;"
1825 	"call mid_one_fp_threshold;"
1826 	"r0 = 0;"
1827 	"exit;"
1828 	::: __clobber_all);
1829 }
1830 
1831 static __used __naked void mid_one_fp_threshold(void)
1832 {
1833 	asm volatile (
1834 	"r6 = r1;"
1835 	"r7 = r10;"
1836 	"r7 += -16;"
1837 	"*(u64 *)(r10 - 8) = r7;"
1838 	"*(u64 *)(r10 - 16) = r6;"
1839 	"r1 = r10;"
1840 	"r1 += -8;"
1841 	"call inner_nop_fptest;"
1842 	"r1 = *(u64 *)(r10 - 8);"
1843 	"r0 = *(u64 *)(r1 + 0);"
1844 	"r0 = 0;"
1845 	"exit;"
1846 	::: __clobber_all);
1847 }
1848 
1849 /*
1850  * Reproducer for unsound pruning when a subprog forwards a parent
1851  * stack pointer (AT_PARENT) to a helper with a memory argument.
1852  *
1853  * set_call_stack_access_at() previously only tracked AT_CURRENT args,
1854  * skipping AT_PARENT entirely. This meant helper reads through parent
1855  * stack pointers did not set parent_use, letting the slot appear dead
1856  * at pruning checkpoints inside the subprog.
1857  *
1858  * Program shape:
1859  *   main:
1860  *     *(u32)(fp-4) = 0             key = STACK_ZERO (const 0)
1861  *     call bpf_get_prandom_u32
1862  *     if r0 != 0 goto clobber      path A (fall-through) first
1863  *     goto merge
1864  *   clobber:
1865  *     *(u8)(fp-4) = r0             path B: key[0] = STACK_MISC
1866  *   merge:
1867  *     r1 = fp - 4
1868  *     call fwd_parent_key_to_helper
1869  *     r0 = 0
1870  *     exit
1871  *
1872  *   fwd_parent_key_to_helper(r1 = &caller_fp-4):
1873  *     goto +0                      checkpoint
1874  *     r2 = r1                      R2 = AT_PARENT ptr to caller fp-4
1875  *     r1 = array_map_8b ll         R1 = array map
1876  *     call bpf_map_lookup_elem     reads key_size(4) from parent fp-4
1877  *     r0 = *(u64 *)(r0 + 0)        deref without null check
1878  *     r0 = 0
1879  *     exit
1880  *
1881  * Path A: STACK_ZERO key = const 0 -> array lookup -> PTR_TO_MAP_VALUE
1882  *         (non-NULL for in-bounds const key) -> deref OK.
1883  * Path B: STACK_MISC key = unknown -> array lookup ->
1884  *         PTR_TO_MAP_VALUE_OR_NULL -> deref UNSAFE.
1885  *
1886  * Bug: AT_PARENT R2 arg to bpf_map_lookup_elem skipped -> parent_use
1887  *      not set -> fp-4 cleaned at checkpoint -> STACK_ZERO collapses
1888  *      to STACK_INVALID -> path B pruned -> deref never checked.
1889  *
1890  * Correct verifier behavior: reject (path B deref of map_value_or_null).
1891  */
1892 SEC("socket")
1893 __flag(BPF_F_TEST_STATE_FREQ)
1894 __failure __msg("R0 invalid mem access 'map_value_or_null'")
1895 __naked void helper_parent_stack_read_unsound(void)
1896 {
1897 	asm volatile (
1898 	/* key at fp-4: all bytes STACK_ZERO */
1899 	"*(u32 *)(r10 - 4) = 0;"
1900 	"call %[bpf_get_prandom_u32];"
1901 	/* fall-through (path A) explored first */
1902 	"if r0 != 0 goto l_clobber%=;"
1903 	/* path A: key stays constant zero */
1904 	"goto l_merge%=;"
1905 "l_clobber%=:"
1906 	/* path B: key[0] becomes STACK_MISC, key no longer const */
1907 	"*(u8 *)(r10 - 4) = r0;"
1908 "l_merge%=:"
1909 	"r1 = r10;"
1910 	"r1 += -4;"
1911 	"call fwd_parent_key_to_helper;"
1912 	"r0 = 0;"
1913 	"exit;"
1914 	:
1915 	: __imm(bpf_get_prandom_u32)
1916 	: __clobber_all);
1917 }
1918 
1919 /*
1920  * Subprog forwards parent stack pointer to bpf_map_lookup_elem as key
1921  * on an array map, then dereferences the result without a null check.
1922  * R1 = &parent_fp-4 (AT_PARENT in this frame).
1923  *
1924  * The helper reads key_size(4) bytes from parent stack.  The deref of
1925  * R0 reads the map value, NOT parent stack, so record_insn_mem_accesses
1926  * does not set parent_use for it.  The ONLY parent stack access is
1927  * through the helper's R2 arg.
1928  */
1929 static __used __naked void fwd_parent_key_to_helper(void)
1930 {
1931 	asm volatile (
1932 	"goto +0;"				/* checkpoint */
1933 	"r2 = r1;"				/* R2 = parent ptr (AT_PARENT) */
1934 	"r1 = %[array_map_8b] ll;"		/* R1 = array map */
1935 	"call %[bpf_map_lookup_elem];"		/* reads 4 bytes from parent fp-4 */
1936 	/* deref without null check: safe for PTR_TO_MAP_VALUE,
1937 	 * unsafe for PTR_TO_MAP_VALUE_OR_NULL
1938 	 */
1939 	"r0 = *(u64 *)(r0 + 0);"
1940 	"r0 = 0;"
1941 	"exit;"
1942 	:
1943 	: __imm(bpf_map_lookup_elem),
1944 	  __imm_addr(array_map_8b)
1945 	: __clobber_all);
1946 }
1947 
1948 /*
1949  * Regression for keeping later helper args after a whole-stack fallback
1950  * on an earlier local arg.  The first bpf_snprintf() arg is a local
1951  * frame-derived pointer with offset-imprecise tracking (`fp1 ?`), which
1952  * conservatively marks the whole local stack live.  The fourth arg still
1953  * forwards &parent_fp-8 and must contribute nonlocal_use[0]=0:3.
1954  */
1955 SEC("socket")
1956 __log_level(2)
1957 __success
1958 __msg("call bpf_snprintf{{.*}}        ; use: fp1-8..-512 fp0-8")
1959 __naked void helper_arg_fallback_keeps_scanning(void)
1960 {
1961 	asm volatile (
1962 	"r1 = 42;"
1963 	"*(u64 *)(r10 - 8) = r1;"
1964 	"r1 = r10;"
1965 	"r1 += -8;"
1966 	"call helper_snprintf_parent_after_local_fallback;"
1967 	"r0 = 0;"
1968 	"exit;"
1969 	::: __clobber_all);
1970 }
1971 
1972 static __used __naked void helper_snprintf_parent_after_local_fallback(void)
1973 {
1974 	asm volatile (
1975 	"r6 = r1;"				/* save &parent_fp-8 */
1976 	"call %[bpf_get_prandom_u32];"
1977 	"r0 &= 8;"
1978 	"r1 = r10;"
1979 	"r1 += -16;"
1980 	"r1 += r0;"				/* local fp, offset-imprecise */
1981 	"r2 = 8;"
1982 	"r3 = %[snprintf_u64_fmt] ll;"
1983 	"r4 = r6;"				/* later arg: parent fp-8 */
1984 	"r5 = 8;"
1985 	"call %[bpf_snprintf];"
1986 	"r0 = 0;"
1987 	"exit;"
1988 	:
1989 	: __imm(bpf_get_prandom_u32),
1990 	  __imm(bpf_snprintf),
1991 	  __imm_addr(snprintf_u64_fmt)
1992 	: __clobber_all);
1993 }
1994 
1995 /*
1996  * Test that propagate_callee_ancestor() correctly chains ancestor
1997  * liveness across sequential calls within a single frame.
1998  *
1999  * main → mid_seq_touch → {nop_callee, deref_ancestor}
2000  *
2001  * mid_seq_touch receives two pointers: R1 = &main_fp-8 (forwarded to
2002  * deref_ancestor) and R2 = &main_fp-16 (read directly by mid_seq_touch).
2003  * The direct read of fp-16 forces ensure_anc_arrays() to allocate
2004  * ancestor_live[0] for mid_seq_touch, so refined_caller_live_stack()
2005  * uses the refined path (not the conservative fallback).
2006  *
2007  * mid_seq_touch calls nop_callee first (no-op, creates a pruning point),
2008  * then calls deref_ancestor which reads main's fp-8 and dereferences it.
2009  *
2010  * propagate_callee_ancestor() propagates deref_ancestor's entry
2011  * ancestor_live[0] into mid_seq_touch's anc_use[0] at the call-to-deref
2012  * instruction.  mid_seq_touch's backward pass flows this backward so
2013  * ancestor_live[0] includes fp-8 at the pruning point between the calls.
2014  *
2015  * Without propagation, mid_seq_touch's ancestor_live[0] only has fp-16
2016  * (from the direct read) — fp-8 is missing.  refined_caller_live_stack()
2017  * Term 1 says fp-8 is dead, the verifier cleans it, and path B
2018  * (scalar 42) is incorrectly pruned against path A (MAP_VALUE).
2019  *
2020  * Path A: main stores PTR_TO_MAP_VALUE at fp-8  → deref succeeds
2021  * Path B: main stores scalar 42 at fp-8         → deref must fail
2022  *
2023  * Correct: reject (path B dereferences scalar)
2024  */
2025 SEC("socket")
2026 __flag(BPF_F_TEST_STATE_FREQ)
2027 __failure __msg("R0 invalid mem access 'scalar'")
2028 __naked void propagate_callee_ancestor_chain(void)
2029 {
2030 	asm volatile (
2031 	/* Map lookup to get PTR_TO_MAP_VALUE */
2032 	"r1 = %[map] ll;"
2033 	"*(u32 *)(r10 - 32) = 0;"
2034 	"r2 = r10;"
2035 	"r2 += -32;"
2036 	"call %[bpf_map_lookup_elem];"
2037 	"if r0 == 0 goto l_exit%=;"
2038 	"r6 = r0;"
2039 	/* Branch: path A (fall-through) explored first */
2040 	"call %[bpf_get_prandom_u32];"
2041 	"if r0 != 0 goto l_scalar%=;"
2042 	/* Path A: fp-8 = PTR_TO_MAP_VALUE */
2043 	"*(u64 *)(r10 - 8) = r6;"
2044 	"goto l_merge%=;"
2045 "l_scalar%=:"
2046 	/* Path B: fp-8 = scalar 42 */
2047 	"r1 = 42;"
2048 	"*(u64 *)(r10 - 8) = r1;"
2049 "l_merge%=:"
2050 	/* fp-16 = dummy value (mid_seq_touch reads it directly) */
2051 	"r1 = 99;"
2052 	"*(u64 *)(r10 - 16) = r1;"
2053 	/* R1 = &fp-8 (for deref_ancestor), R2 = &fp-16 (for mid_seq_touch) */
2054 	"r1 = r10;"
2055 	"r1 += -8;"
2056 	"r2 = r10;"
2057 	"r2 += -16;"
2058 	"call mid_seq_touch;"
2059 	"r0 = 0;"
2060 	"exit;"
2061 "l_exit%=:"
2062 	"r0 = 0;"
2063 	"exit;"
2064 	:
2065 	: __imm(bpf_map_lookup_elem),
2066 	  __imm(bpf_get_prandom_u32),
2067 	  __imm_addr(map)
2068 	: __clobber_all);
2069 }
2070 
2071 /*
2072  * R1 = &main_fp-8 (forwarded to deref_ancestor)
2073  * R2 = &main_fp-16 (read directly here → allocates ancestor_live[0])
2074  *
2075  * Reads main's fp-16 to force ancestor_live[0] allocation, then
2076  * calls nop_callee (pruning point), then deref_ancestor.
2077  */
2078 static __used __naked void mid_seq_touch(void)
2079 {
2080 	asm volatile (
2081 	"r6 = r1;"			/* save &main_fp-8 in callee-saved */
2082 	"r0 = *(u64 *)(r2 + 0);"	/* read main's fp-16: triggers anc_use[0] */
2083 	"call nop_callee;"		/* no-op, creates pruning point after */
2084 	"r1 = r6;"			/* restore ptr to &main_fp-8 */
2085 	"call deref_ancestor;"		/* reads main's fp-8, dereferences */
2086 	"r0 = 0;"
2087 	"exit;"
2088 	::: __clobber_all);
2089 }
2090 
2091 static __used __naked void nop_callee(void)
2092 {
2093 	asm volatile (
2094 	"r0 = 0;"
2095 	"exit;"
2096 	::: __clobber_all);
2097 }
2098 
2099 /* Reads main's fp-8 through forwarded pointer, dereferences result */
2100 static __used __naked void deref_ancestor(void)
2101 {
2102 	asm volatile (
2103 	"r0 = *(u64 *)(r1 + 0);"	/* read main's fp-8 */
2104 	"r0 = *(u64 *)(r0 + 0);"	/* deref: safe for map_ptr, unsafe for scalar */
2105 	"r0 = 0;"
2106 	"exit;"
2107 	::: __clobber_all);
2108 }
2109 
2110 /*
2111  * Test: callee loads an fp-derived pointer from caller's stack, then
2112  * reads through it to access another caller stack slot.
2113  *
2114  * main stores PTR_TO_MAP_VALUE at fp-24, stores &fp-24 (an fp-derived
2115  * pointer) at fp-8, passes &fp-8 through mid_fwd_spilled_ptr to
2116  * load_ptr_deref_grandchild.  The leaf loads the pointer from main's
2117  * fp-8, then reads main's fp-24 through the loaded pointer.
2118  *
2119  * fill_from_stack() in arg_track_xfer() only handles local-frame
2120  * FP-derived loads (src_is_local_fp check requires frame == depth).
2121  * When a callee loads from a parent-frame pointer (frame < depth),
2122  * the loaded value gets ARG_NONE instead of being recognized as
2123  * fp-derived.  Subsequent reads through that loaded pointer are
2124  * invisible to liveness — nonlocal_use is never set for fp-24.
2125  *
2126  * clean_live_states() cleans the current state at every prune point.
2127  * Because liveness misses fp-24, refined_caller_live_stack() tells
2128  * __clean_func_state() that fp-24 is dead, which destroys the
2129  * PTR_TO_MAP_VALUE spill before the grandchild can read it.
2130  * The grandchild then reads STACK_INVALID → scalar, and the deref
2131  * is rejected with "R0 invalid mem access 'scalar'" — even though
2132  * fp-24 is genuinely live and holds a valid map pointer.
2133  *
2134  * This is a false positive: a valid program incorrectly rejected.
2135  */
2136 SEC("socket")
2137 __flag(BPF_F_TEST_STATE_FREQ)
2138 __success
2139 __naked void spilled_fp_cross_frame_deref(void)
2140 {
2141 	asm volatile (
2142 	/* Map lookup to get PTR_TO_MAP_VALUE */
2143 	"r1 = %[map] ll;"
2144 	"*(u32 *)(r10 - 32) = 0;"
2145 	"r2 = r10;"
2146 	"r2 += -32;"
2147 	"call %[bpf_map_lookup_elem];"
2148 	"if r0 == 0 goto l_exit%=;"
2149 	/* fp-24 = PTR_TO_MAP_VALUE */
2150 	"*(u64 *)(r10 - 24) = r0;"
2151 	/* Store pointer to fp-24 at fp-8 */
2152 	"r1 = r10;"
2153 	"r1 += -24;"
2154 	"*(u64 *)(r10 - 8) = r1;"
2155 	/* R1 = &fp-8: pointer to the spilled ptr */
2156 	"r1 = r10;"
2157 	"r1 += -8;"
2158 	"call mid_fwd_spilled_ptr;"
2159 	"r0 = 0;"
2160 	"exit;"
2161 "l_exit%=:"
2162 	"r0 = 0;"
2163 	"exit;"
2164 	:
2165 	: __imm(bpf_map_lookup_elem),
2166 	  __imm_addr(map)
2167 	: __clobber_all);
2168 }
2169 
2170 /* Forwards R1 (ptr to main's fp-8, which holds &main_fp-24) to leaf */
2171 static __used __naked void mid_fwd_spilled_ptr(void)
2172 {
2173 	asm volatile (
2174 	"call load_ptr_deref_grandchild;"
2175 	"r0 = 0;"
2176 	"exit;"
2177 	::: __clobber_all);
2178 }
2179 
2180 /*
2181  * R1 = &main_fp-8 (where main stored ptr to fp-24)
2182  * Loads the ptr from main's fp-8, reads main's fp-24 through it,
2183  * then dereferences the result.
2184  */
2185 static __used __naked void load_ptr_deref_grandchild(void)
2186 {
2187 	asm volatile (
2188 	/* Load ptr from main's fp-8 → r2 = &main_fp-24 */
2189 	"r2 = *(u64 *)(r1 + 0);"
2190 	/* Read main's fp-24 through loaded ptr */
2191 	"r0 = *(u64 *)(r2 + 0);"
2192 	/* Dereference: safe for map_ptr */
2193 	"r0 = *(u64 *)(r0 + 0);"
2194 	"r0 = 0;"
2195 	"exit;"
2196 	::: __clobber_all);
2197 }
2198 
2199 /*
2200  * Exercise merge_nonlocal_live().
2201  *
2202  * merge_shared_mid is analyzed twice (once from each wrapper), so the
2203  * callsite within merge_shared_mid that calls merge_leaf_read gets its
2204  * nonlocal_live info merged twice via merge_nonlocal_live().
2205  */
2206 SEC("socket")
2207 __log_level(2)
2208 __success
2209 __msg("14: (85) call pc+2	r1: fp0-16")
2210 __msg("17: (79) r0 = *(u64 *)(r1 +0) // r1=fp0-16")
2211 __msg("14: (85) call pc+2	r1: fp0-8")
2212 __msg("17: (79) r0 = *(u64 *)(r1 +0) // r1=fp0-8")
2213 __msg("5: (85) call pc+{{.*}}                   ; use: fp0-8 fp0-16")
2214 __naked void test_merge_nonlocal_live(void)
2215 {
2216 	asm volatile (
2217 	"r1 = 0;"
2218 	"*(u64 *)(r10 - 8) = r1;"
2219 	"*(u64 *)(r10 - 16) = r1;"
2220 	"r1 = r10;"
2221 	"r1 += -8;"
2222 	"call merge_wrapper_a;"
2223 	"r1 = r10;"
2224 	"r1 += -16;"
2225 	"call merge_wrapper_b;"
2226 	"r0 = 0;"
2227 	"exit;"
2228 	::: __clobber_all);
2229 }
2230 
2231 static __used __naked void merge_wrapper_a(void)
2232 {
2233 	asm volatile (
2234 	"call merge_shared_mid;"
2235 	"r0 = 0;"
2236 	"exit;"
2237 	::: __clobber_all);
2238 }
2239 
2240 static __used __naked void merge_wrapper_b(void)
2241 {
2242 	asm volatile (
2243 	"call merge_shared_mid;"
2244 	"r0 = 0;"
2245 	"exit;"
2246 	::: __clobber_all);
2247 }
2248 
2249 static __used __naked void merge_shared_mid(void)
2250 {
2251 	asm volatile (
2252 	"call merge_leaf_read;"
2253 	"r0 = 0;"
2254 	"exit;"
2255 	::: __clobber_all);
2256 }
2257 
2258 static __used __naked void merge_leaf_read(void)
2259 {
2260 	asm volatile (
2261 	"r0 = *(u64 *)(r1 + 0);"
2262 	"r0 = 0;"
2263 	"exit;"
2264 	::: __clobber_all);
2265 }
2266 
2267 /* Same bpf_loop instruction calls different callbacks depending on branch. */
2268 SEC("socket")
2269 __log_level(2)
2270 __success
2271 __msg("call bpf_loop#181            ; use: fp2-8..-512 fp1-8..-512 fp0-8..-512")
2272 __naked void bpf_loop_two_callbacks(void)
2273 {
2274 	asm volatile (
2275 	"r1 = 0;"
2276 	"*(u64 *)(r10 - 8) = r1;"
2277 	"*(u64 *)(r10 - 16) = r1;"
2278 	"r1 = r10;"
2279 	"r1 += -8;"
2280 	"call dyn_wrapper_a;"
2281 	"r1 = r10;"
2282 	"r1 += -16;"
2283 	"call dyn_wrapper_b;"
2284 	"r0 = 0;"
2285 	"exit;"
2286 	::: __clobber_all);
2287 }
2288 
2289 static __used __naked void dyn_wrapper_a(void)
2290 {
2291 	asm volatile (
2292 	"call mid_dynamic_cb;"
2293 	"r0 = 0;"
2294 	"exit;"
2295 	::: __clobber_all);
2296 }
2297 
2298 static __used __naked void dyn_wrapper_b(void)
2299 {
2300 	asm volatile (
2301 	"call mid_dynamic_cb;"
2302 	"r0 = 0;"
2303 	"exit;"
2304 	::: __clobber_all);
2305 }
2306 
2307 static __used __naked void mid_dynamic_cb(void)
2308 {
2309 	asm volatile (
2310 	"r6 = r1;"
2311 	"call %[bpf_get_prandom_u32];"
2312 	"if r0 == 0 goto 1f;"
2313 	"r2 = dyn_cb_a ll;"
2314 	"goto 2f;"
2315 	"1:"
2316 	"r2 = dyn_cb_b ll;"
2317 	"2:"
2318 	"r1 = 1;"
2319 	"r3 = r6;" /* ctx = fp-derived ptr from parent */
2320 	"r4 = 0;"
2321 	"call %[bpf_loop];"
2322 	"r0 = 0;"
2323 	"exit;"
2324 	:: __imm(bpf_get_prandom_u32),
2325 	   __imm(bpf_loop)
2326 	: __clobber_all);
2327 }
2328 
2329 /* Callback A/B: read parent stack through ctx */
2330 static __used __naked void dyn_cb_a(void)
2331 {
2332 	asm volatile (
2333 	"r0 = *(u64 *)(r2 + 0);"
2334 	"r0 = 0;"
2335 	"exit;"
2336 	::: __clobber_all);
2337 }
2338 
2339 static __used __naked void dyn_cb_b(void)
2340 {
2341 	asm volatile (
2342 	"r0 = *(u64 *)(r2 + 0);"
2343 	"r0 = 0;"
2344 	"exit;"
2345 	::: __clobber_all);
2346 }
2347 
2348 /*
2349  * Path A: r0 = map_lookup result (non-FP, ARG_NONE for stack tracking)
2350  * Path B: r0 = fp-8 (FP-derived, frame=0, off=-8)
2351  * At the join: r0 is not guaranteed to be a frame pointer.
2352  */
2353 SEC("socket")
2354 __log_level(2)
2355 __msg("10: (79) r0 = *(u64 *)(r10 -8) // r0=fp0-8|fp0+0")
2356 __naked void stack_or_non_stack_write(void)
2357 {
2358 	asm volatile (
2359 	/* initial write to fp-8 */
2360 	"*(u64 *)(r10 - 8) = 0;"
2361 	/* map lookup to get a non-FP pointer */
2362 	"r2 = r10;"
2363 	"r2 += -4;"
2364 	"r1 = %[map] ll;"
2365 	"call %[bpf_map_lookup_elem];"
2366 	/* r0 = map_value (ARG_NONE) */
2367 	"if r0 != 0 goto 1f;"
2368 	/* path B: r0 = fp-8 */
2369 	"r0 = r10;"
2370 	"r0 += -8;"
2371 "1:"
2372 	/* join: the write is not a def for fp[0]-8 */
2373 	"*(u64 *)(r0 + 0) = 7;"
2374 	/* read fp-8: should be non-poisoned */
2375 	"r0 = *(u64 *)(r10 - 8);"
2376 	"exit;"
2377 	:
2378 	: __imm(bpf_map_lookup_elem),
2379 	  __imm_addr(map)
2380 	: __clobber_all);
2381 }
2382 
2383 SEC("socket")
2384 __log_level(2)
2385 __flag(BPF_F_TEST_STATE_FREQ)
2386 __msg("subprog#2 write_first_read_second:")
2387 __msg("17: (7a) *(u64 *)(r1 +0) = 42{{$}}")
2388 __msg("18: (79) r0 = *(u64 *)(r2 +0) // r1=fp0-8 r2=fp0-16{{$}}")
2389 __msg("stack use/def subprog#2 write_first_read_second (d2,cs15):")
2390 __msg("17: (7a) *(u64 *)(r1 +0) = 42{{$}}")
2391 __msg("18: (79) r0 = *(u64 *)(r2 +0)         ; use: fp0-8 fp0-16")
2392 __naked void shared_instance_must_write_overwrite(void)
2393 {
2394 	asm volatile (
2395 	"r1 = 1;"
2396 	"*(u64 *)(r10 - 8) = r1;"
2397 	"*(u64 *)(r10 - 16) = r1;"
2398 	/* Call 1: write_first_read_second(&fp[-8], &fp[-16]) */
2399 	"r1 = r10;"
2400 	"r1 += -8;"
2401 	"r2 = r10;"
2402 	"r2 += -16;"
2403 	"call forwarding_rw;"
2404 	/* Call 2: write_first_read_second(&fp[-16], &fp[-8]) */
2405 	"r1 = r10;"
2406 	"r1 += -16;"
2407 	"r2 = r10;"
2408 	"r2 += -8;"
2409 	"call forwarding_rw;"
2410 	"r0 = 0;"
2411 	"exit;"
2412 	::: __clobber_all);
2413 }
2414 
2415 static __used __naked void forwarding_rw(void)
2416 {
2417 	asm volatile (
2418 	"call write_first_read_second;"
2419 	"exit;"
2420 	::: __clobber_all);
2421 }
2422 
2423 static __used __naked void write_first_read_second(void)
2424 {
2425 	asm volatile (
2426 	"*(u64 *)(r1 + 0) = 42;"
2427 	"r0 = *(u64 *)(r2 + 0);"
2428 	"exit;"
2429 	::: __clobber_all);
2430 }
2431 
2432 /*
2433  * Shared must_write when (callsite, depth) instance is reused.
2434  * Main calls fwd_to_stale_wr at two sites. fwd_to_stale_wr calls
2435  * stale_wr_leaf at a single internal callsite. Both calls share
2436  * stale_wr_leaf's (callsite, depth) instance.
2437  *
2438  * Call 1: stale_wr_leaf(map_value, fp-8) writes map, reads fp-8.
2439  * Call 2: stale_wr_leaf(fp-8, fp-8) writes fp-8, reads fp-8.
2440  *
2441  * The analysis can't presume that stale_wr_leaf() always writes fp-8,
2442  * it must conservatively join must_write masks computed for both calls.
2443  */
2444 SEC("socket")
2445 __success
2446 __naked void stale_must_write_cross_callsite(void)
2447 {
2448 	asm volatile (
2449 	"*(u64 *)(r10 - 8) = 0;"
2450 	/* Call 1: map_value write, fp-8 read (processed second in PO) */
2451 	"*(u32 *)(r10 - 16) = 0;"
2452 	"r1 = %[map] ll;"
2453 	"r2 = r10;"
2454 	"r2 += -16;"
2455 	"call %[bpf_map_lookup_elem];"
2456 	"if r0 == 0 goto 1f;"
2457 	"r1 = r0;"
2458 	"r2 = r10;"
2459 	"r2 += -8;"
2460 	"call fwd_to_stale_wr;"
2461 	/* Call 2: fp-8 write, fp-8 read (processed first in PO) */
2462 	"r1 = r10;"
2463 	"r1 += -8;"
2464 	"r2 = r1;"
2465 	"call fwd_to_stale_wr;"
2466 "1:"
2467 	"r0 = 0;"
2468 	"exit;"
2469 	:: __imm_addr(map),
2470 	   __imm(bpf_map_lookup_elem)
2471 	: __clobber_all);
2472 }
2473 
2474 static __used __naked void fwd_to_stale_wr(void)
2475 {
2476 	asm volatile (
2477 	"call stale_wr_leaf;"
2478 	"exit;"
2479 	::: __clobber_all);
2480 }
2481 
2482 static __used __naked void stale_wr_leaf(void)
2483 {
2484 	asm volatile (
2485 	"*(u64 *)(r1 + 0) = 42;"
2486 	"r0 = *(u64 *)(r2 + 0);"
2487 	"exit;"
2488 	::: __clobber_all);
2489 }
2490 
2491 #ifdef CAN_USE_LOAD_ACQ_STORE_REL
2492 
2493 SEC("socket")
2494 __log_level(2)
2495 __success
2496 __msg("*(u64 *)(r0 +0) = 42         ; def: fp0-16")
2497 __naked void load_acquire_dont_clear_dst(void)
2498 {
2499 	asm volatile (
2500 	"r0 = r10;"
2501 	"r0 += -16;"
2502 	"*(u64 *)(r0 + 0) = r0;"	/* fp[-16] == &fp[-16] */
2503 	".8byte %[load_acquire_insn];"	/* load_acquire is a special case for BPF_STX, */
2504 	"r0 = *(u64 *)(r10 - 16);"	/* it shouldn't clear tracking info for */
2505 	"*(u64 *)(r0 + 0) = 42;"	/* dst register, r0 in this case. */
2506 	"r0 = 0;"
2507 	"exit;"
2508 	:
2509 	: __imm_insn(load_acquire_insn,
2510 		     BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_0, 0))
2511 	: __clobber_all);
2512 }
2513 
2514 #endif /* CAN_USE_LOAD_ACQ_STORE_REL */
2515 
2516 SEC("socket")
2517 __success
2518 __naked void imprecise_fill_loses_cross_frame(void)
2519 {
2520 	asm volatile (
2521 	"*(u64 *)(r10 - 8) = 0;"
2522 	"r1 = r10;"
2523 	"r1 += -8;"
2524 	"call imprecise_fill_cross_frame;"
2525 	"exit;"
2526 	::: __clobber_all);
2527 }
2528 
2529 static __used __naked void imprecise_fill_cross_frame(void)
2530 {
2531 	asm volatile (
2532 	/* spill &caller_fp-8 to callee's fp-8 */
2533 	"*(u64 *)(r10 - 8) = r1;"
2534 	/* imprecise FP pointer in r1 */
2535 	"r1 = r10;"
2536 	"r2 = -8;"
2537 	"r1 += r2;"
2538 	/* load from imprecise offset. fill_from_stack returns
2539 	 * ARG_IMPRECISE{mask=BIT(1)}, losing frame 0
2540 	 */
2541 	"r1 = *(u64 *)(r1 + 0);"
2542 	/* read caller's fp-8 through loaded pointer, should mark fp0-8 live */
2543 	"r0 = *(u64 *)(r1 + 0);"
2544 	"r0 = 0;"
2545 	"exit;"
2546 	:: __imm(bpf_get_prandom_u32)
2547 	: __clobber_all);
2548 }
2549 
2550 /* Test that spill_to_stack with multi-offset dst (sz=8) joins instead
2551  * of overwriting. r1 has offsets [-8, -16]. Both slots hold FP-derived
2552  * pointers. Writing through r1 should join *val with existing values,
2553  * not destroy them.
2554  *
2555  *   fp-8  = &fp-24
2556  *   fp-16 = &fp-32
2557  *   r1 = fp-8 or fp-16 (two offsets from branch)
2558  *   *(u64 *)(r1 + 0) = &fp-24   -- writes to one slot, other untouched
2559  *   r0 = *(u64 *)(r10 - 16)     -- fill from fp-16
2560  *   r0 = *(u64 *)(r0 + 0)       -- deref: should produce use
2561  */
2562 SEC("socket")
2563 __log_level(2)
2564 __success
2565 __msg("20: (79) r0 = *(u64 *)(r10 -16)")
2566 __msg("21: (79) r0 = *(u64 *)(r0 +0)         ; use: fp0-24 fp0-32")
2567 __naked void spill_join_with_multi_off(void)
2568 {
2569 	asm volatile (
2570 	/* fp-8 = &fp-24, fp-16 = &fp-32 (different pointers) */
2571 	"*(u64 *)(r10 - 24) = 0;"
2572 	"*(u64 *)(r10 - 32) = 0;"
2573 	"r1 = r10;"
2574 	"r1 += -24;"
2575 	"*(u64 *)(r10 - 8) = r1;"
2576 	"r1 = r10;"
2577 	"r1 += -32;"
2578 	"*(u64 *)(r10 - 16) = r1;"
2579 	/* create r1 with two candidate offsets: fp-8 or fp-16 */
2580 	"call %[bpf_get_prandom_u32];"
2581 	"if r0 == 0 goto 1f;"
2582 	"r1 = r10;"
2583 	"r1 += -8;"
2584 	"goto 2f;"
2585 "1:"
2586 	"r1 = r10;"
2587 	"r1 += -16;"
2588 "2:"
2589 	/* write &fp-24 through multi-offset r1: hits one slot, other untouched */
2590 	"r2 = r10;"
2591 	"r2 += -24;"
2592 	"*(u64 *)(r1 + 0) = r2;"
2593 	/* read back *fp-8 and *fp-16 */
2594 	"r0 = *(u64 *)(r10 - 8);"
2595 	"r0 = *(u64 *)(r0 + 0);"
2596 	"r0 = *(u64 *)(r10 - 16);"
2597 	"r0 = *(u64 *)(r0 + 0);"
2598 	"exit;"
2599 	:: __imm(bpf_get_prandom_u32)
2600 	: __clobber_all);
2601 }
2602 
2603 /* Test that spill_to_stack with imprecise dst (off_cnt == 0, sz=8)
2604  * joins instead of overwriting. Use "r2 = -8; r1 += r2" to make
2605  * arg tracking lose offset precision while the main verifier keeps
2606  * r1 as PTR_TO_STACK with fixed offset. Both slots hold FP-derived
2607  * pointers. Writing through r1 should join *val with existing
2608  * values, not destroy them.
2609  *
2610  *   fp-8  = &fp-24
2611  *   fp-16 = &fp-32
2612  *   r1 = fp-8 (imprecise to arg tracking)
2613  *   *(u64 *)(r1 + 0) = &fp-24   -- since r1 is imprecise, this adds &fp-24
2614  *                                  to the set of possible values for all slots,
2615  *                                  hence the values at fp-16 become [fp-24, fp-32]
2616  *   r0 = *(u64 *)(r10 - 16)
2617  *   r0 = *(u64 *)(r0 + 0)       -- deref: should produce use of fp-24 or fp-32
2618  */
2619 SEC("socket")
2620 __log_level(2)
2621 __success
2622 __msg("15: (79) r0 = *(u64 *)(r0 +0)         ; use: fp0-24 fp0-32")
2623 __naked void spill_join_with_imprecise_off(void)
2624 {
2625 	asm volatile (
2626 	"*(u64 *)(r10 - 24) = 0;"
2627 	"*(u64 *)(r10 - 32) = 0;"
2628 	"r1 = r10;"
2629 	"r1 += -24;"
2630 	"*(u64 *)(r10 - 8) = r1;"
2631 	"r1 = r10;"
2632 	"r1 += -32;"
2633 	"*(u64 *)(r10 - 16) = r1;"
2634 	/* r1 = fp-8 but arg tracking sees off_cnt == 0 */
2635 	"r1 = r10;"
2636 	"r2 = -8;"
2637 	"r1 += r2;"
2638 	/* write through imprecise r1 */
2639 	"r3 = r10;"
2640 	"r3 += -24;"
2641 	"*(u64 *)(r1 + 0) = r3;"
2642 	/* read back fp-16: at_stack should still track &fp-32 */
2643 	"r0 = *(u64 *)(r10 - 16);"
2644 	/* deref: should produce use for fp-32 */
2645 	"r0 = *(u64 *)(r0 + 0);"
2646 	"r0 = 0;"
2647 	"exit;"
2648 	::: __clobber_all);
2649 }
2650