xref: /linux/kernel/bpf/verifier.c (revision 5e0b273e0a62cc04ec338c7b502797c66c2ed42a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5  */
6 #include <uapi/linux/btf.h>
7 #include <linux/bpf-cgroup.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/bpf.h>
12 #include <linux/btf.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/filter.h>
15 #include <net/netlink.h>
16 #include <linux/file.h>
17 #include <linux/vmalloc.h>
18 #include <linux/stringify.h>
19 #include <linux/bsearch.h>
20 #include <linux/sort.h>
21 #include <linux/perf_event.h>
22 #include <linux/ctype.h>
23 #include <linux/error-injection.h>
24 #include <linux/bpf_lsm.h>
25 #include <linux/btf_ids.h>
26 #include <linux/poison.h>
27 #include <linux/module.h>
28 #include <linux/cpumask.h>
29 #include <linux/cnum.h>
30 #include <linux/bpf_mem_alloc.h>
31 #include <net/xdp.h>
32 #include <linux/trace_events.h>
33 #include <linux/kallsyms.h>
34 
35 #include "disasm.h"
36 
37 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
38 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
39 	[_id] = & _name ## _verifier_ops,
40 #define BPF_MAP_TYPE(_id, _ops)
41 #define BPF_LINK_TYPE(_id, _name)
42 #include <linux/bpf_types.h>
43 #undef BPF_PROG_TYPE
44 #undef BPF_MAP_TYPE
45 #undef BPF_LINK_TYPE
46 };
47 
48 enum bpf_features {
49 	BPF_FEAT_RDONLY_CAST_TO_VOID = 0,
50 	BPF_FEAT_STREAMS	     = 1,
51 	__MAX_BPF_FEAT,
52 };
53 
54 struct bpf_mem_alloc bpf_global_percpu_ma;
55 static bool bpf_global_percpu_ma_set;
56 
57 /* bpf_check() is a static code analyzer that walks eBPF program
58  * instruction by instruction and updates register/stack state.
59  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
60  *
61  * The first pass is depth-first-search to check that the program is a DAG.
62  * It rejects the following programs:
63  * - larger than BPF_MAXINSNS insns
64  * - if loop is present (detected via back-edge)
65  * - unreachable insns exist (shouldn't be a forest. program = one function)
66  * - out of bounds or malformed jumps
67  * The second pass is all possible path descent from the 1st insn.
68  * Since it's analyzing all paths through the program, the length of the
69  * analysis is limited to 64k insn, which may be hit even if total number of
70  * insn is less then 4K, but there are too many branches that change stack/regs.
71  * Number of 'branches to be analyzed' is limited to 1k
72  *
73  * On entry to each instruction, each register has a type, and the instruction
74  * changes the types of the registers depending on instruction semantics.
75  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
76  * copied to R1.
77  *
78  * All registers are 64-bit.
79  * R0 - return register
80  * R1-R5 argument passing registers
81  * R6-R9 callee saved registers
82  * R10 - frame pointer read-only
83  *
84  * At the start of BPF program the register R1 contains a pointer to bpf_context
85  * and has type PTR_TO_CTX.
86  *
87  * Verifier tracks arithmetic operations on pointers in case:
88  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
89  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
90  * 1st insn copies R10 (which has FRAME_PTR) type into R1
91  * and 2nd arithmetic instruction is pattern matched to recognize
92  * that it wants to construct a pointer to some element within stack.
93  * So after 2nd insn, the register R1 has type PTR_TO_STACK
94  * (and -20 constant is saved for further stack bounds checking).
95  * Meaning that this reg is a pointer to stack plus known immediate constant.
96  *
97  * Most of the time the registers have SCALAR_VALUE type, which
98  * means the register has some value, but it's not a valid pointer.
99  * (like pointer plus pointer becomes SCALAR_VALUE type)
100  *
101  * When verifier sees load or store instructions the type of base register
102  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
103  * four pointer types recognized by check_mem_access() function.
104  *
105  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
106  * and the range of [ptr, ptr + map's value_size) is accessible.
107  *
108  * registers used to pass values to function calls are checked against
109  * function argument constraints.
110  *
111  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
112  * It means that the register type passed to this function must be
113  * PTR_TO_STACK and it will be used inside the function as
114  * 'pointer to map element key'
115  *
116  * For example the argument constraints for bpf_map_lookup_elem():
117  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
118  *   .arg1_type = ARG_CONST_MAP_PTR,
119  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
120  *
121  * ret_type says that this function returns 'pointer to map elem value or null'
122  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
123  * 2nd argument should be a pointer to stack, which will be used inside
124  * the helper function as a pointer to map element key.
125  *
126  * On the kernel side the helper function looks like:
127  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
128  * {
129  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
130  *    void *key = (void *) (unsigned long) r2;
131  *    void *value;
132  *
133  *    here kernel can access 'key' and 'map' pointers safely, knowing that
134  *    [key, key + map->key_size) bytes are valid and were initialized on
135  *    the stack of eBPF program.
136  * }
137  *
138  * Corresponding eBPF program may look like:
139  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
140  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
141  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
142  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
143  * here verifier looks at prototype of map_lookup_elem() and sees:
144  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
145  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
146  *
147  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
148  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
149  * and were initialized prior to this call.
150  * If it's ok, then verifier allows this BPF_CALL insn and looks at
151  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
152  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
153  * returns either pointer to map value or NULL.
154  *
155  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
156  * insn, the register holding that pointer in the true branch changes state to
157  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
158  * branch. See check_cond_jmp_op().
159  *
160  * After the call R0 is set to return type of the function and registers R1-R5
161  * are set to NOT_INIT to indicate that they are no longer readable.
162  *
163  * The following reference types represent a potential reference to a kernel
164  * resource which, after first being allocated, must be checked and freed by
165  * the BPF program:
166  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
167  *
168  * When the verifier sees a helper call return a reference type, it allocates a
169  * pointer id for the reference and stores it in the current function state.
170  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
171  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
172  * passes through a NULL-check conditional. For the branch wherein the state is
173  * changed to CONST_IMM, the verifier releases the reference.
174  *
175  * For each helper function that allocates a reference, such as
176  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
177  * bpf_sk_release(). When a reference type passes into the release function,
178  * the verifier also releases the reference. If any unchecked or unreleased
179  * reference remains at the end of the program, the verifier rejects it.
180  */
181 
182 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
183 struct bpf_verifier_stack_elem {
184 	/* verifier state is 'st'
185 	 * before processing instruction 'insn_idx'
186 	 * and after processing instruction 'prev_insn_idx'
187 	 */
188 	struct bpf_verifier_state st;
189 	int insn_idx;
190 	int prev_insn_idx;
191 	struct bpf_verifier_stack_elem *next;
192 	/* length of verifier log at the time this state was pushed on stack */
193 	u32 log_pos;
194 };
195 
196 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
197 #define BPF_COMPLEXITY_LIMIT_STATES	64
198 
199 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE  512
200 
201 #define BPF_PRIV_STACK_MIN_SIZE		64
202 
203 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx, int parent_id);
204 static int release_reference_nomark(struct bpf_verifier_state *state, int id);
205 static int release_reference(struct bpf_verifier_env *env, int id);
206 static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
207 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
208 static bool is_tracing_prog_type(enum bpf_prog_type type);
209 static int ref_set_non_owning(struct bpf_verifier_env *env,
210 			      struct bpf_reg_state *reg);
211 static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg);
212 static inline bool in_sleepable_context(struct bpf_verifier_env *env);
213 static const char *non_sleepable_context_description(struct bpf_verifier_env *env);
214 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
215 static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
216 
217 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
218 			      struct bpf_map *map,
219 			      bool unpriv, bool poison)
220 {
221 	unpriv |= bpf_map_ptr_unpriv(aux);
222 	aux->map_ptr_state.unpriv = unpriv;
223 	aux->map_ptr_state.poison = poison;
224 	aux->map_ptr_state.map_ptr = map;
225 }
226 
227 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
228 {
229 	bool poisoned = bpf_map_key_poisoned(aux);
230 
231 	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
232 			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
233 }
234 
235 static void update_ref_obj(struct ref_obj_desc *ref_obj, struct bpf_reg_state *reg)
236 {
237 	ref_obj->id = reg->id;
238 	ref_obj->parent_id = reg->parent_id;
239 	ref_obj->cnt++;
240 }
241 
242 static int validate_ref_obj(struct bpf_verifier_env *env, struct ref_obj_desc *ref_obj)
243 {
244 	if (ref_obj->cnt > 1) {
245 		verifier_bug(env, "function expects only one referenced object but got %d\n",
246 			     ref_obj->cnt);
247 		return -EFAULT;
248 	}
249 
250 	return 0;
251 }
252 
253 struct bpf_call_arg_meta {
254 	struct bpf_map_desc map;
255 	struct bpf_dynptr_desc dynptr;
256 	struct ref_obj_desc ref_obj;
257 	bool raw_mode;
258 	bool pkt_access;
259 	u8 release_regno;
260 	int regno;
261 	int access_size;
262 	int mem_size;
263 	u64 msize_max_value;
264 	int func_id;
265 	struct btf *btf;
266 	u32 btf_id;
267 	struct btf *ret_btf;
268 	u32 ret_btf_id;
269 	u32 subprogno;
270 	struct btf_field *kptr_field;
271 	s64 const_map_key;
272 };
273 
274 struct bpf_kfunc_meta {
275 	struct btf *btf;
276 	const struct btf_type *proto;
277 	const char *name;
278 	const u32 *flags;
279 	s32 id;
280 };
281 
282 struct btf *btf_vmlinux;
283 
284 typedef struct argno {
285 	int argno;
286 } argno_t;
287 
288 static argno_t argno_from_reg(u32 regno)
289 {
290 	return (argno_t){ .argno = regno };
291 }
292 
293 static argno_t argno_from_arg(u32 arg)
294 {
295 	return (argno_t){ .argno = -arg };
296 }
297 
298 static int reg_from_argno(argno_t a)
299 {
300 	if (a.argno >= 0)
301 		return a.argno;
302 	if (a.argno >= -MAX_BPF_FUNC_REG_ARGS)
303 		return -a.argno;
304 	return -1;
305 }
306 
307 static int arg_from_argno(argno_t a)
308 {
309 	if (a.argno < 0)
310 		return -a.argno;
311 	return -1;
312 }
313 
314 static int arg_idx_from_argno(argno_t a)
315 {
316 	return arg_from_argno(a) - 1;
317 }
318 
319 static const char *btf_type_name(const struct btf *btf, u32 id)
320 {
321 	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
322 }
323 
324 static DEFINE_MUTEX(bpf_verifier_lock);
325 static DEFINE_MUTEX(bpf_percpu_ma_lock);
326 
327 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
328 {
329 	struct bpf_verifier_env *env = private_data;
330 	va_list args;
331 
332 	if (!bpf_verifier_log_needed(&env->log))
333 		return;
334 
335 	va_start(args, fmt);
336 	bpf_verifier_vlog(&env->log, fmt, args);
337 	va_end(args);
338 }
339 
340 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
341 				   struct bpf_reg_state *reg,
342 				   struct bpf_retval_range range, const char *ctx,
343 				   const char *reg_name)
344 {
345 	bool unknown = true;
346 
347 	verbose(env, "%s the register %s has", ctx, reg_name);
348 	if (reg_smin(reg) > S64_MIN) {
349 		verbose(env, " smin=%lld", reg_smin(reg));
350 		unknown = false;
351 	}
352 	if (reg_smax(reg) < S64_MAX) {
353 		verbose(env, " smax=%lld", reg_smax(reg));
354 		unknown = false;
355 	}
356 	if (unknown)
357 		verbose(env, " unknown scalar value");
358 	verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
359 }
360 
361 static bool reg_not_null(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
362 {
363 	enum bpf_reg_type type;
364 
365 	type = reg->type;
366 	if (type_may_be_null(type))
367 		return false;
368 
369 	type = base_type(type);
370 	return type == PTR_TO_SOCKET ||
371 		type == PTR_TO_TCP_SOCK ||
372 		type == PTR_TO_MAP_VALUE ||
373 		type == PTR_TO_MAP_KEY ||
374 		type == PTR_TO_SOCK_COMMON ||
375 		(type == PTR_TO_BTF_ID && is_trusted_reg(env, reg)) ||
376 		(type == PTR_TO_MEM && !(reg->type & PTR_UNTRUSTED)) ||
377 		type == CONST_PTR_TO_MAP;
378 }
379 
380 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
381 {
382 	struct btf_record *rec = NULL;
383 	struct btf_struct_meta *meta;
384 
385 	if (reg->type == PTR_TO_MAP_VALUE) {
386 		rec = reg->map_ptr->record;
387 	} else if (type_is_ptr_alloc_obj(reg->type)) {
388 		meta = btf_find_struct_meta(reg->btf, reg->btf_id);
389 		if (meta)
390 			rec = meta->record;
391 	}
392 	return rec;
393 }
394 
395 bool bpf_subprog_is_global(const struct bpf_verifier_env *env, int subprog)
396 {
397 	struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
398 
399 	return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
400 }
401 
402 static bool subprog_returns_void(struct bpf_verifier_env *env, int subprog)
403 {
404 	const struct btf_type *type, *func, *func_proto;
405 	const struct btf *btf = env->prog->aux->btf;
406 	u32 btf_id;
407 
408 	btf_id = env->prog->aux->func_info[subprog].type_id;
409 
410 	func = btf_type_by_id(btf, btf_id);
411 	if (verifier_bug_if(!func, env, "btf_id %u not found", btf_id))
412 		return false;
413 
414 	func_proto = btf_type_by_id(btf, func->type);
415 	if (!func_proto)
416 		return false;
417 
418 	type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
419 	if (!type)
420 		return false;
421 
422 	return btf_type_is_void(type);
423 }
424 
425 static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
426 {
427 	struct bpf_func_info *info;
428 
429 	if (!env->prog->aux->func_info)
430 		return "";
431 
432 	info = &env->prog->aux->func_info[subprog];
433 	return btf_type_name(env->prog->aux->btf, info->type_id);
434 }
435 
436 void bpf_mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
437 {
438 	struct bpf_subprog_info *info = subprog_info(env, subprog);
439 
440 	info->is_cb = true;
441 	info->is_async_cb = true;
442 	info->is_exception_cb = true;
443 }
444 
445 static bool subprog_is_exc_cb(struct bpf_verifier_env *env, int subprog)
446 {
447 	return subprog_info(env, subprog)->is_exception_cb;
448 }
449 
450 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
451 {
452 	return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK);
453 }
454 
455 static bool type_is_rdonly_mem(u32 type)
456 {
457 	return type & MEM_RDONLY;
458 }
459 
460 static bool is_acquire_function(enum bpf_func_id func_id,
461 				const struct bpf_map *map)
462 {
463 	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
464 
465 	if (func_id == BPF_FUNC_sk_lookup_tcp ||
466 	    func_id == BPF_FUNC_sk_lookup_udp ||
467 	    func_id == BPF_FUNC_skc_lookup_tcp ||
468 	    func_id == BPF_FUNC_ringbuf_reserve ||
469 	    func_id == BPF_FUNC_kptr_xchg)
470 		return true;
471 
472 	if (func_id == BPF_FUNC_map_lookup_elem &&
473 	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
474 	     map_type == BPF_MAP_TYPE_SOCKHASH))
475 		return true;
476 
477 	return false;
478 }
479 
480 static bool is_ptr_cast_function(enum bpf_func_id func_id)
481 {
482 	return func_id == BPF_FUNC_tcp_sock ||
483 		func_id == BPF_FUNC_sk_fullsock ||
484 		func_id == BPF_FUNC_skc_to_tcp_sock ||
485 		func_id == BPF_FUNC_skc_to_tcp6_sock ||
486 		func_id == BPF_FUNC_skc_to_udp6_sock ||
487 		func_id == BPF_FUNC_skc_to_mptcp_sock ||
488 		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
489 		func_id == BPF_FUNC_skc_to_tcp_request_sock;
490 }
491 
492 static bool is_sync_callback_calling_kfunc(u32 btf_id);
493 static bool is_async_callback_calling_kfunc(u32 btf_id);
494 static bool is_callback_calling_kfunc(u32 btf_id);
495 
496 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id);
497 static bool is_task_work_add_kfunc(u32 func_id);
498 
499 static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
500 {
501 	return func_id == BPF_FUNC_for_each_map_elem ||
502 	       func_id == BPF_FUNC_find_vma ||
503 	       func_id == BPF_FUNC_loop ||
504 	       func_id == BPF_FUNC_user_ringbuf_drain;
505 }
506 
507 static bool is_async_callback_calling_function(enum bpf_func_id func_id)
508 {
509 	return func_id == BPF_FUNC_timer_set_callback;
510 }
511 
512 static bool is_callback_calling_function(enum bpf_func_id func_id)
513 {
514 	return is_sync_callback_calling_function(func_id) ||
515 	       is_async_callback_calling_function(func_id);
516 }
517 
518 bool bpf_is_sync_callback_calling_insn(struct bpf_insn *insn)
519 {
520 	return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
521 	       (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
522 }
523 
524 bool bpf_is_async_callback_calling_insn(struct bpf_insn *insn)
525 {
526 	return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
527 	       (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
528 }
529 
530 static bool is_async_cb_sleepable(struct bpf_verifier_env *env, struct bpf_insn *insn)
531 {
532 	/* bpf_timer callbacks are never sleepable. */
533 	if (bpf_helper_call(insn) && insn->imm == BPF_FUNC_timer_set_callback)
534 		return false;
535 
536 	/* bpf_wq and bpf_task_work callbacks are always sleepable. */
537 	if (bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
538 	    (is_bpf_wq_set_callback_kfunc(insn->imm) || is_task_work_add_kfunc(insn->imm)))
539 		return true;
540 
541 	verifier_bug(env, "unhandled async callback in is_async_cb_sleepable");
542 	return false;
543 }
544 
545 bool bpf_is_may_goto_insn(struct bpf_insn *insn)
546 {
547 	return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
548 }
549 
550 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
551 {
552        int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
553 
554        /* We need to check that slots between [spi - nr_slots + 1, spi] are
555 	* within [0, allocated_stack).
556 	*
557 	* Please note that the spi grows downwards. For example, a dynptr
558 	* takes the size of two stack slots; the first slot will be at
559 	* spi and the second slot will be at spi - 1.
560 	*/
561        return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
562 }
563 
564 static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
565 			          const char *obj_kind, int nr_slots)
566 {
567 	int off, spi;
568 
569 	if (!tnum_is_const(reg->var_off)) {
570 		verbose(env, "%s has to be at a constant offset\n", obj_kind);
571 		return -EINVAL;
572 	}
573 
574 	off = reg->var_off.value;
575 	if (off % BPF_REG_SIZE) {
576 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
577 		return -EINVAL;
578 	}
579 
580 	spi = bpf_get_spi(off);
581 	if (spi + 1 < nr_slots) {
582 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
583 		return -EINVAL;
584 	}
585 
586 	if (!is_spi_bounds_valid(bpf_func(env, reg), spi, nr_slots))
587 		return -ERANGE;
588 	return spi;
589 }
590 
591 static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
592 {
593 	return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
594 }
595 
596 static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
597 {
598 	return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
599 }
600 
601 static int irq_flag_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
602 {
603 	return stack_slot_obj_get_spi(env, reg, "irq_flag", 1);
604 }
605 
606 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
607 {
608 	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
609 	case DYNPTR_TYPE_LOCAL:
610 		return BPF_DYNPTR_TYPE_LOCAL;
611 	case DYNPTR_TYPE_RINGBUF:
612 		return BPF_DYNPTR_TYPE_RINGBUF;
613 	case DYNPTR_TYPE_SKB:
614 		return BPF_DYNPTR_TYPE_SKB;
615 	case DYNPTR_TYPE_XDP:
616 		return BPF_DYNPTR_TYPE_XDP;
617 	case DYNPTR_TYPE_SKB_META:
618 		return BPF_DYNPTR_TYPE_SKB_META;
619 	case DYNPTR_TYPE_FILE:
620 		return BPF_DYNPTR_TYPE_FILE;
621 	default:
622 		return BPF_DYNPTR_TYPE_INVALID;
623 	}
624 }
625 
626 static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
627 {
628 	switch (type) {
629 	case BPF_DYNPTR_TYPE_LOCAL:
630 		return DYNPTR_TYPE_LOCAL;
631 	case BPF_DYNPTR_TYPE_RINGBUF:
632 		return DYNPTR_TYPE_RINGBUF;
633 	case BPF_DYNPTR_TYPE_SKB:
634 		return DYNPTR_TYPE_SKB;
635 	case BPF_DYNPTR_TYPE_XDP:
636 		return DYNPTR_TYPE_XDP;
637 	case BPF_DYNPTR_TYPE_SKB_META:
638 		return DYNPTR_TYPE_SKB_META;
639 	case BPF_DYNPTR_TYPE_FILE:
640 		return DYNPTR_TYPE_FILE;
641 	default:
642 		return 0;
643 	}
644 }
645 
646 static bool dynptr_type_referenced(enum bpf_dynptr_type type)
647 {
648 	return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE;
649 }
650 
651 static void __mark_dynptr_reg(struct bpf_reg_state *reg,
652 			      enum bpf_dynptr_type type,
653 			      bool first_slot, int id, int parent_id);
654 
655 
656 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
657 				   struct bpf_reg_state *sreg1,
658 				   struct bpf_reg_state *sreg2,
659 				   enum bpf_dynptr_type type, int parent_id)
660 {
661 	int id = ++env->id_gen;
662 
663 	__mark_dynptr_reg(sreg1, type, true, id, parent_id);
664 	__mark_dynptr_reg(sreg2, type, false, id, parent_id);
665 }
666 
667 static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
668 			       struct bpf_reg_state *reg,
669 			       enum bpf_dynptr_type type)
670 {
671 	__mark_dynptr_reg(reg, type, true, ++env->id_gen, 0);
672 }
673 
674 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
675 				        struct bpf_func_state *state, int spi);
676 
677 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
678 				   enum bpf_arg_type arg_type, int insn_idx,
679 				   struct ref_obj_desc *ref_obj, struct bpf_dynptr_desc *dynptr)
680 {
681 	struct bpf_func_state *state = bpf_func(env, reg);
682 	int spi, i, err, parent_id = 0;
683 	enum bpf_dynptr_type type;
684 
685 	spi = dynptr_get_spi(env, reg);
686 	if (spi < 0)
687 		return spi;
688 
689 	/* We cannot assume both spi and spi - 1 belong to the same dynptr,
690 	 * hence we need to call destroy_if_dynptr_stack_slot twice for both,
691 	 * to ensure that for the following example:
692 	 *	[d1][d1][d2][d2]
693 	 * spi    3   2   1   0
694 	 * So marking spi = 2 should lead to destruction of both d1 and d2. In
695 	 * case they do belong to same dynptr, second call won't see slot_type
696 	 * as STACK_DYNPTR and will simply skip destruction.
697 	 */
698 	err = destroy_if_dynptr_stack_slot(env, state, spi);
699 	if (err)
700 		return err;
701 	err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
702 	if (err)
703 		return err;
704 
705 	for (i = 0; i < BPF_REG_SIZE; i++) {
706 		state->stack[spi].slot_type[i] = STACK_DYNPTR;
707 		state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
708 	}
709 
710 	type = arg_to_dynptr_type(arg_type);
711 	if (type == BPF_DYNPTR_TYPE_INVALID)
712 		return -EINVAL;
713 
714 	if (dynptr->type == BPF_DYNPTR_TYPE_INVALID) { /* dynptr constructors */
715 		err = validate_ref_obj(env, ref_obj);
716 		if (err)
717 			return err;
718 
719 		/* Track parent's id if the parent is a referenced object */
720 		parent_id = ref_obj->id;
721 
722 		if (dynptr_type_referenced(type)) {
723 			int id;
724 
725 			/*
726 			 * Create an intermediate reference that tracks the referenced
727 			 * object for the referenced dynptr. Freeing a referenced dynptr
728 			 * through helpers/kfuncs will invalidate all clones.
729 			 */
730 			id = acquire_reference(env, insn_idx, parent_id);
731 			if (id < 0)
732 				return id;
733 
734 			parent_id = id;
735 		}
736 	} else { /* bpf_dynptr_clone() */
737 		parent_id = dynptr->parent_id;
738 	}
739 
740 	mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
741 			       &state->stack[spi - 1].spilled_ptr, type, parent_id);
742 
743 	return 0;
744 }
745 
746 static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_stack_state *stack)
747 {
748 	int i;
749 
750 	for (i = 0; i < BPF_REG_SIZE; i++) {
751 		stack[0].slot_type[i] = STACK_INVALID;
752 		stack[1].slot_type[i] = STACK_INVALID;
753 	}
754 
755 	bpf_mark_reg_not_init(env, &stack[0].spilled_ptr);
756 	bpf_mark_reg_not_init(env, &stack[1].spilled_ptr);
757 }
758 
759 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
760 {
761 	struct bpf_func_state *state = bpf_func(env, reg);
762 	int spi;
763 
764 	spi = dynptr_get_spi(env, reg);
765 	if (spi < 0)
766 		return spi;
767 
768 	/*
769 	 * For referenced dynptr, release the parent ref which cascades to
770 	 * all clones and derived slices. For non-referenced dynptr, only
771 	 * the dynptr and slices derived from it will be invalidated.
772 	 */
773 	reg = &state->stack[spi].spilled_ptr;
774 	return release_reference(env, dynptr_type_referenced(reg->dynptr.type)
775 				      ? reg->parent_id
776 				      : reg->id);
777 }
778 
779 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
780 			       struct bpf_reg_state *reg);
781 
782 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
783 {
784 	if (!env->allow_ptr_leaks)
785 		bpf_mark_reg_not_init(env, reg);
786 	else
787 		__mark_reg_unknown(env, reg);
788 }
789 
790 static int dynptr_ref_cnt(struct bpf_verifier_env *env, int v_parent_id)
791 {
792 	struct bpf_stack_state *stack;
793 	struct bpf_func_state *state;
794 	struct bpf_reg_state *reg;
795 	int ref_cnt = 0;
796 
797 	bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, stack, 1 << STACK_DYNPTR, ({
798 		if (!stack || stack->slot_type[0] != STACK_DYNPTR)
799 			continue;
800 		if (!stack->spilled_ptr.dynptr.first_slot)
801 			continue;
802 		if (stack->spilled_ptr.parent_id == v_parent_id)
803 			ref_cnt++;
804 	}));
805 
806 	return ref_cnt;
807 }
808 
809 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
810 				        struct bpf_func_state *state, int spi)
811 {
812 	int err = 0;
813 
814 	/* We always ensure that STACK_DYNPTR is never set partially,
815 	 * hence just checking for slot_type[0] is enough. This is
816 	 * different for STACK_SPILL, where it may be only set for
817 	 * 1 byte, so code has to use is_spilled_reg.
818 	 */
819 	if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
820 		return 0;
821 
822 	/* Reposition spi to first slot */
823 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
824 		spi = spi + 1;
825 
826 	/*
827 	 * A referenced dynptr can be overwritten only if there is at
828 	 * least one other dynptr sharing the same virtual ref parent,
829 	 * ensuring the reference can still be properly released.
830 	 */
831 	if (dynptr_type_referenced(state->stack[spi].spilled_ptr.dynptr.type) &&
832 	    dynptr_ref_cnt(env, state->stack[spi].spilled_ptr.parent_id) <= 1) {
833 		verbose(env, "cannot overwrite referenced dynptr\n");
834 		return -EINVAL;
835 	}
836 
837 	/* Invalidate the dynptr and any derived slices */
838 	err = release_reference(env, state->stack[spi].spilled_ptr.id);
839 	if (!err) {
840 		mark_stack_slot_scratched(env, spi);
841 		mark_stack_slot_scratched(env, spi - 1);
842 	}
843 
844 	return err;
845 }
846 
847 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
848 {
849 	int spi;
850 
851 	if (reg->type == CONST_PTR_TO_DYNPTR)
852 		return false;
853 
854 	spi = dynptr_get_spi(env, reg);
855 
856 	/* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
857 	 * error because this just means the stack state hasn't been updated yet.
858 	 * We will do check_mem_access to check and update stack bounds later.
859 	 */
860 	if (spi < 0 && spi != -ERANGE)
861 		return false;
862 
863 	/* We don't need to check if the stack slots are marked by previous
864 	 * dynptr initializations because we allow overwriting existing unreferenced
865 	 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
866 	 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
867 	 * touching are completely destructed before we reinitialize them for a new
868 	 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
869 	 * instead of delaying it until the end where the user will get "Unreleased
870 	 * reference" error.
871 	 */
872 	return true;
873 }
874 
875 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
876 {
877 	struct bpf_func_state *state = bpf_func(env, reg);
878 	int i, spi;
879 
880 	/* This already represents first slot of initialized bpf_dynptr.
881 	 *
882 	 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
883 	 * check_func_arg_reg_off's logic, so we don't need to check its
884 	 * offset and alignment.
885 	 */
886 	if (reg->type == CONST_PTR_TO_DYNPTR)
887 		return true;
888 
889 	spi = dynptr_get_spi(env, reg);
890 	if (spi < 0)
891 		return false;
892 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
893 		return false;
894 
895 	for (i = 0; i < BPF_REG_SIZE; i++) {
896 		if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
897 		    state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
898 			return false;
899 	}
900 
901 	return true;
902 }
903 
904 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
905 				    enum bpf_arg_type arg_type)
906 {
907 	struct bpf_func_state *state = bpf_func(env, reg);
908 	enum bpf_dynptr_type dynptr_type;
909 	int spi;
910 
911 	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
912 	if (arg_type == ARG_PTR_TO_DYNPTR)
913 		return true;
914 
915 	dynptr_type = arg_to_dynptr_type(arg_type);
916 	if (reg->type == CONST_PTR_TO_DYNPTR) {
917 		return reg->dynptr.type == dynptr_type;
918 	} else {
919 		spi = dynptr_get_spi(env, reg);
920 		if (spi < 0)
921 			return false;
922 		return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
923 	}
924 }
925 
926 static void __mark_reg_known_zero(struct bpf_reg_state *reg);
927 
928 static bool in_rcu_cs(struct bpf_verifier_env *env);
929 
930 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta);
931 
932 static int mark_stack_slots_iter(struct bpf_verifier_env *env,
933 				 struct bpf_kfunc_call_arg_meta *meta,
934 				 struct bpf_reg_state *reg, int insn_idx,
935 				 struct btf *btf, u32 btf_id, int nr_slots)
936 {
937 	struct bpf_func_state *state = bpf_func(env, reg);
938 	int spi, i, j, id;
939 
940 	spi = iter_get_spi(env, reg, nr_slots);
941 	if (spi < 0)
942 		return spi;
943 
944 	id = acquire_reference(env, insn_idx, 0);
945 	if (id < 0)
946 		return id;
947 
948 	for (i = 0; i < nr_slots; i++) {
949 		struct bpf_stack_state *slot = &state->stack[spi - i];
950 		struct bpf_reg_state *st = &slot->spilled_ptr;
951 
952 		__mark_reg_known_zero(st);
953 		st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
954 		if (is_kfunc_rcu_protected(meta)) {
955 			if (in_rcu_cs(env))
956 				st->type |= MEM_RCU;
957 			else
958 				st->type |= PTR_UNTRUSTED;
959 		}
960 		st->id = i == 0 ? id : 0;
961 		st->iter.btf = btf;
962 		st->iter.btf_id = btf_id;
963 		st->iter.state = BPF_ITER_STATE_ACTIVE;
964 		st->iter.depth = 0;
965 
966 		for (j = 0; j < BPF_REG_SIZE; j++)
967 			slot->slot_type[j] = STACK_ITER;
968 
969 		mark_stack_slot_scratched(env, spi - i);
970 	}
971 
972 	return 0;
973 }
974 
975 static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
976 				   struct bpf_reg_state *reg, int nr_slots)
977 {
978 	struct bpf_func_state *state = bpf_func(env, reg);
979 	int spi, i, j;
980 
981 	spi = iter_get_spi(env, reg, nr_slots);
982 	if (spi < 0)
983 		return spi;
984 
985 	for (i = 0; i < nr_slots; i++) {
986 		struct bpf_stack_state *slot = &state->stack[spi - i];
987 		struct bpf_reg_state *st = &slot->spilled_ptr;
988 
989 		if (i == 0)
990 			WARN_ON_ONCE(release_reference(env, st->id));
991 
992 		bpf_mark_reg_not_init(env, st);
993 
994 		for (j = 0; j < BPF_REG_SIZE; j++)
995 			slot->slot_type[j] = STACK_INVALID;
996 
997 		mark_stack_slot_scratched(env, spi - i);
998 	}
999 
1000 	return 0;
1001 }
1002 
1003 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1004 				     struct bpf_reg_state *reg, int nr_slots)
1005 {
1006 	struct bpf_func_state *state = bpf_func(env, reg);
1007 	int spi, i, j;
1008 
1009 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1010 	 * will do check_mem_access to check and update stack bounds later, so
1011 	 * return true for that case.
1012 	 */
1013 	spi = iter_get_spi(env, reg, nr_slots);
1014 	if (spi == -ERANGE)
1015 		return true;
1016 	if (spi < 0)
1017 		return false;
1018 
1019 	for (i = 0; i < nr_slots; i++) {
1020 		struct bpf_stack_state *slot = &state->stack[spi - i];
1021 
1022 		for (j = 0; j < BPF_REG_SIZE; j++)
1023 			if (slot->slot_type[j] == STACK_ITER)
1024 				return false;
1025 	}
1026 
1027 	return true;
1028 }
1029 
1030 static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1031 				   struct btf *btf, u32 btf_id, int nr_slots)
1032 {
1033 	struct bpf_func_state *state = bpf_func(env, reg);
1034 	int spi, i, j;
1035 
1036 	spi = iter_get_spi(env, reg, nr_slots);
1037 	if (spi < 0)
1038 		return -EINVAL;
1039 
1040 	for (i = 0; i < nr_slots; i++) {
1041 		struct bpf_stack_state *slot = &state->stack[spi - i];
1042 		struct bpf_reg_state *st = &slot->spilled_ptr;
1043 
1044 		if (st->type & PTR_UNTRUSTED)
1045 			return -EPROTO;
1046 		/* only main (first) slot has id set */
1047 		if (i == 0 && !st->id)
1048 			return -EINVAL;
1049 		if (i != 0 && st->id)
1050 			return -EINVAL;
1051 		if (st->iter.btf != btf || st->iter.btf_id != btf_id)
1052 			return -EINVAL;
1053 
1054 		for (j = 0; j < BPF_REG_SIZE; j++)
1055 			if (slot->slot_type[j] != STACK_ITER)
1056 				return -EINVAL;
1057 	}
1058 
1059 	return 0;
1060 }
1061 
1062 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx);
1063 static int release_irq_state(struct bpf_verifier_state *state, int id);
1064 
1065 static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
1066 				     struct bpf_kfunc_call_arg_meta *meta,
1067 				     struct bpf_reg_state *reg, int insn_idx,
1068 				     int kfunc_class)
1069 {
1070 	struct bpf_func_state *state = bpf_func(env, reg);
1071 	struct bpf_stack_state *slot;
1072 	struct bpf_reg_state *st;
1073 	int spi, i, id;
1074 
1075 	spi = irq_flag_get_spi(env, reg);
1076 	if (spi < 0)
1077 		return spi;
1078 
1079 	id = acquire_irq_state(env, insn_idx);
1080 	if (id < 0)
1081 		return id;
1082 
1083 	slot = &state->stack[spi];
1084 	st = &slot->spilled_ptr;
1085 
1086 	__mark_reg_known_zero(st);
1087 	st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1088 	st->id = id;
1089 	st->irq.kfunc_class = kfunc_class;
1090 
1091 	for (i = 0; i < BPF_REG_SIZE; i++)
1092 		slot->slot_type[i] = STACK_IRQ_FLAG;
1093 
1094 	mark_stack_slot_scratched(env, spi);
1095 	return 0;
1096 }
1097 
1098 static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1099 				      int kfunc_class)
1100 {
1101 	struct bpf_func_state *state = bpf_func(env, reg);
1102 	struct bpf_stack_state *slot;
1103 	struct bpf_reg_state *st;
1104 	int spi, i, err;
1105 
1106 	spi = irq_flag_get_spi(env, reg);
1107 	if (spi < 0)
1108 		return spi;
1109 
1110 	slot = &state->stack[spi];
1111 	st = &slot->spilled_ptr;
1112 
1113 	if (st->irq.kfunc_class != kfunc_class) {
1114 		const char *flag_kfunc = st->irq.kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1115 		const char *used_kfunc = kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1116 
1117 		verbose(env, "irq flag acquired by %s kfuncs cannot be restored with %s kfuncs\n",
1118 			flag_kfunc, used_kfunc);
1119 		return -EINVAL;
1120 	}
1121 
1122 	err = release_irq_state(env->cur_state, st->id);
1123 	WARN_ON_ONCE(err && err != -EACCES);
1124 	if (err) {
1125 		int insn_idx = 0;
1126 
1127 		for (int i = 0; i < env->cur_state->acquired_refs; i++) {
1128 			if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) {
1129 				insn_idx = env->cur_state->refs[i].insn_idx;
1130 				break;
1131 			}
1132 		}
1133 
1134 		verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n",
1135 			env->cur_state->active_irq_id, insn_idx);
1136 		return err;
1137 	}
1138 
1139 	bpf_mark_reg_not_init(env, st);
1140 
1141 	for (i = 0; i < BPF_REG_SIZE; i++)
1142 		slot->slot_type[i] = STACK_INVALID;
1143 
1144 	mark_stack_slot_scratched(env, spi);
1145 	return 0;
1146 }
1147 
1148 static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1149 {
1150 	struct bpf_func_state *state = bpf_func(env, reg);
1151 	struct bpf_stack_state *slot;
1152 	int spi, i;
1153 
1154 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1155 	 * will do check_mem_access to check and update stack bounds later, so
1156 	 * return true for that case.
1157 	 */
1158 	spi = irq_flag_get_spi(env, reg);
1159 	if (spi == -ERANGE)
1160 		return true;
1161 	if (spi < 0)
1162 		return false;
1163 
1164 	slot = &state->stack[spi];
1165 
1166 	for (i = 0; i < BPF_REG_SIZE; i++)
1167 		if (slot->slot_type[i] == STACK_IRQ_FLAG)
1168 			return false;
1169 	return true;
1170 }
1171 
1172 static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1173 {
1174 	struct bpf_func_state *state = bpf_func(env, reg);
1175 	struct bpf_stack_state *slot;
1176 	struct bpf_reg_state *st;
1177 	int spi, i;
1178 
1179 	spi = irq_flag_get_spi(env, reg);
1180 	if (spi < 0)
1181 		return -EINVAL;
1182 
1183 	slot = &state->stack[spi];
1184 	st = &slot->spilled_ptr;
1185 
1186 	if (!st->id)
1187 		return -EINVAL;
1188 
1189 	for (i = 0; i < BPF_REG_SIZE; i++)
1190 		if (slot->slot_type[i] != STACK_IRQ_FLAG)
1191 			return -EINVAL;
1192 	return 0;
1193 }
1194 
1195 /* Check if given stack slot is "special":
1196  *   - spilled register state (STACK_SPILL);
1197  *   - dynptr state (STACK_DYNPTR);
1198  *   - iter state (STACK_ITER).
1199  *   - irq flag state (STACK_IRQ_FLAG)
1200  */
1201 static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1202 {
1203 	enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
1204 
1205 	switch (type) {
1206 	case STACK_SPILL:
1207 	case STACK_DYNPTR:
1208 	case STACK_ITER:
1209 	case STACK_IRQ_FLAG:
1210 		return true;
1211 	case STACK_INVALID:
1212 	case STACK_POISON:
1213 	case STACK_MISC:
1214 	case STACK_ZERO:
1215 		return false;
1216 	default:
1217 		WARN_ONCE(1, "unknown stack slot type %d\n", type);
1218 		return true;
1219 	}
1220 }
1221 
1222 /* The reg state of a pointer or a bounded scalar was saved when
1223  * it was spilled to the stack.
1224  */
1225 
1226 /*
1227  * Mark stack slot as STACK_MISC, unless it is already:
1228  * - STACK_INVALID, in which case they are equivalent.
1229  * - STACK_ZERO, in which case we preserve more precise STACK_ZERO.
1230  * - STACK_POISON, which truly forbids access to the slot.
1231  * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged
1232  * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is
1233  * unnecessary as both are considered equivalent when loading data and pruning,
1234  * in case of unprivileged mode it will be incorrect to allow reads of invalid
1235  * slots.
1236  */
1237 static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
1238 {
1239 	if (*stype == STACK_ZERO)
1240 		return;
1241 	if (*stype == STACK_INVALID || *stype == STACK_POISON)
1242 		return;
1243 	*stype = STACK_MISC;
1244 }
1245 
1246 static void scrub_spilled_slot(u8 *stype)
1247 {
1248 	if (*stype != STACK_INVALID && *stype != STACK_POISON)
1249 		*stype = STACK_MISC;
1250 }
1251 
1252 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1253  * small to hold src. This is different from krealloc since we don't want to preserve
1254  * the contents of dst.
1255  *
1256  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1257  * not be allocated.
1258  */
1259 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1260 {
1261 	size_t alloc_bytes;
1262 	void *orig = dst;
1263 	size_t bytes;
1264 
1265 	if (ZERO_OR_NULL_PTR(src))
1266 		goto out;
1267 
1268 	if (unlikely(check_mul_overflow(n, size, &bytes)))
1269 		return NULL;
1270 
1271 	alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1272 	dst = krealloc(orig, alloc_bytes, flags);
1273 	if (!dst) {
1274 		kfree(orig);
1275 		return NULL;
1276 	}
1277 
1278 	memcpy(dst, src, bytes);
1279 out:
1280 	return dst ? dst : ZERO_SIZE_PTR;
1281 }
1282 
1283 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1284  * small to hold new_n items. new items are zeroed out if the array grows.
1285  *
1286  * Contrary to krealloc_array, does not free arr if new_n is zero.
1287  */
1288 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1289 {
1290 	size_t alloc_size;
1291 	void *new_arr;
1292 
1293 	if (!new_n || old_n == new_n)
1294 		goto out;
1295 
1296 	alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1297 	new_arr = krealloc(arr, alloc_size, GFP_KERNEL_ACCOUNT);
1298 	if (!new_arr) {
1299 		kfree(arr);
1300 		return NULL;
1301 	}
1302 	arr = new_arr;
1303 
1304 	if (new_n > old_n)
1305 		memset(arr + old_n * size, 0, (new_n - old_n) * size);
1306 
1307 out:
1308 	return arr ? arr : ZERO_SIZE_PTR;
1309 }
1310 
1311 static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src)
1312 {
1313 	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1314 			       sizeof(struct bpf_reference_state), GFP_KERNEL_ACCOUNT);
1315 	if (!dst->refs)
1316 		return -ENOMEM;
1317 
1318 	dst->acquired_refs = src->acquired_refs;
1319 	dst->active_locks = src->active_locks;
1320 	dst->active_preempt_locks = src->active_preempt_locks;
1321 	dst->active_rcu_locks = src->active_rcu_locks;
1322 	dst->active_irq_id = src->active_irq_id;
1323 	dst->active_lock_id = src->active_lock_id;
1324 	dst->active_lock_ptr = src->active_lock_ptr;
1325 	return 0;
1326 }
1327 
1328 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1329 {
1330 	size_t n = src->allocated_stack / BPF_REG_SIZE;
1331 
1332 	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1333 				GFP_KERNEL_ACCOUNT);
1334 	if (!dst->stack)
1335 		return -ENOMEM;
1336 
1337 	dst->allocated_stack = src->allocated_stack;
1338 
1339 	/* copy stack args state */
1340 	n = src->out_stack_arg_cnt;
1341 	if (n) {
1342 		dst->stack_arg_regs = copy_array(dst->stack_arg_regs, src->stack_arg_regs, n,
1343 						 sizeof(struct bpf_reg_state),
1344 						 GFP_KERNEL_ACCOUNT);
1345 		if (!dst->stack_arg_regs)
1346 			return -ENOMEM;
1347 	}
1348 
1349 	dst->out_stack_arg_cnt = src->out_stack_arg_cnt;
1350 	return 0;
1351 }
1352 
1353 static int resize_reference_state(struct bpf_verifier_state *state, size_t n)
1354 {
1355 	state->refs = realloc_array(state->refs, state->acquired_refs, n,
1356 				    sizeof(struct bpf_reference_state));
1357 	if (!state->refs)
1358 		return -ENOMEM;
1359 
1360 	state->acquired_refs = n;
1361 	return 0;
1362 }
1363 
1364 /* Possibly update state->allocated_stack to be at least size bytes. Also
1365  * possibly update the function's high-water mark in its bpf_subprog_info.
1366  */
1367 static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
1368 {
1369 	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n;
1370 
1371 	/* The stack size is always a multiple of BPF_REG_SIZE. */
1372 	size = round_up(size, BPF_REG_SIZE);
1373 	n = size / BPF_REG_SIZE;
1374 
1375 	if (old_n >= n)
1376 		return 0;
1377 
1378 	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1379 	if (!state->stack)
1380 		return -ENOMEM;
1381 
1382 	state->allocated_stack = size;
1383 
1384 	/* update known max for given subprogram */
1385 	if (env->subprog_info[state->subprogno].stack_depth < size)
1386 		env->subprog_info[state->subprogno].stack_depth = size;
1387 
1388 	return 0;
1389 }
1390 
1391 static int grow_stack_arg_slots(struct bpf_verifier_env *env,
1392 				struct bpf_func_state *state, int cnt)
1393 {
1394 	size_t old_n = state->out_stack_arg_cnt;
1395 
1396 	if (old_n >= cnt)
1397 		return 0;
1398 
1399 	state->stack_arg_regs = realloc_array(state->stack_arg_regs, old_n, cnt,
1400 					      sizeof(struct bpf_reg_state));
1401 	if (!state->stack_arg_regs)
1402 		return -ENOMEM;
1403 
1404 	state->out_stack_arg_cnt = cnt;
1405 	return 0;
1406 }
1407 
1408 /* Acquire a pointer id from the env and update the state->refs to include
1409  * this new pointer reference.
1410  * On success, returns a valid pointer id to associate with the register
1411  * On failure, returns a negative errno.
1412  */
1413 static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1414 {
1415 	struct bpf_verifier_state *state = env->cur_state;
1416 	int new_ofs = state->acquired_refs;
1417 	int err;
1418 
1419 	err = resize_reference_state(state, state->acquired_refs + 1);
1420 	if (err)
1421 		return NULL;
1422 	state->refs[new_ofs].insn_idx = insn_idx;
1423 
1424 	return &state->refs[new_ofs];
1425 }
1426 
1427 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx, int parent_id)
1428 {
1429 	struct bpf_reference_state *s;
1430 
1431 	s = acquire_reference_state(env, insn_idx);
1432 	if (!s)
1433 		return -ENOMEM;
1434 	s->type = REF_TYPE_PTR;
1435 	s->id = ++env->id_gen;
1436 	s->parent_id = parent_id;
1437 	return s->id;
1438 }
1439 
1440 static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type,
1441 			      int id, void *ptr)
1442 {
1443 	struct bpf_verifier_state *state = env->cur_state;
1444 	struct bpf_reference_state *s;
1445 
1446 	s = acquire_reference_state(env, insn_idx);
1447 	if (!s)
1448 		return -ENOMEM;
1449 	s->type = type;
1450 	s->id = id;
1451 	s->ptr = ptr;
1452 
1453 	state->active_locks++;
1454 	state->active_lock_id = id;
1455 	state->active_lock_ptr = ptr;
1456 	return 0;
1457 }
1458 
1459 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx)
1460 {
1461 	struct bpf_verifier_state *state = env->cur_state;
1462 	struct bpf_reference_state *s;
1463 
1464 	s = acquire_reference_state(env, insn_idx);
1465 	if (!s)
1466 		return -ENOMEM;
1467 	s->type = REF_TYPE_IRQ;
1468 	s->id = ++env->id_gen;
1469 
1470 	state->active_irq_id = s->id;
1471 	return s->id;
1472 }
1473 
1474 static void release_reference_state(struct bpf_verifier_state *state, int idx)
1475 {
1476 	int last_idx;
1477 	size_t rem;
1478 
1479 	/* IRQ state requires the relative ordering of elements remaining the
1480 	 * same, since it relies on the refs array to behave as a stack, so that
1481 	 * it can detect out-of-order IRQ restore. Hence use memmove to shift
1482 	 * the array instead of swapping the final element into the deleted idx.
1483 	 */
1484 	last_idx = state->acquired_refs - 1;
1485 	rem = state->acquired_refs - idx - 1;
1486 	if (last_idx && idx != last_idx)
1487 		memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem);
1488 	memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1489 	state->acquired_refs--;
1490 	return;
1491 }
1492 
1493 static bool find_reference_state(struct bpf_verifier_state *state, int id)
1494 {
1495 	int i;
1496 
1497 	for (i = 0; i < state->acquired_refs; i++) {
1498 		if (state->refs[i].type != REF_TYPE_PTR)
1499 			continue;
1500 		if (state->refs[i].id == id)
1501 			return true;
1502 	}
1503 
1504 	return false;
1505 }
1506 
1507 static bool reg_is_referenced(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
1508 {
1509 	return find_reference_state(env->cur_state, reg->id);
1510 }
1511 
1512 static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr)
1513 {
1514 	void *prev_ptr = NULL;
1515 	u32 prev_id = 0;
1516 	int i;
1517 
1518 	for (i = 0; i < state->acquired_refs; i++) {
1519 		if (state->refs[i].type == type && state->refs[i].id == id &&
1520 		    state->refs[i].ptr == ptr) {
1521 			release_reference_state(state, i);
1522 			state->active_locks--;
1523 			/* Reassign active lock (id, ptr). */
1524 			state->active_lock_id = prev_id;
1525 			state->active_lock_ptr = prev_ptr;
1526 			return 0;
1527 		}
1528 		if (state->refs[i].type & REF_TYPE_LOCK_MASK) {
1529 			prev_id = state->refs[i].id;
1530 			prev_ptr = state->refs[i].ptr;
1531 		}
1532 	}
1533 	return -EINVAL;
1534 }
1535 
1536 static int release_irq_state(struct bpf_verifier_state *state, int id)
1537 {
1538 	u32 prev_id = 0;
1539 	int i;
1540 
1541 	if (id != state->active_irq_id)
1542 		return -EACCES;
1543 
1544 	for (i = 0; i < state->acquired_refs; i++) {
1545 		if (state->refs[i].type != REF_TYPE_IRQ)
1546 			continue;
1547 		if (state->refs[i].id == id) {
1548 			release_reference_state(state, i);
1549 			state->active_irq_id = prev_id;
1550 			return 0;
1551 		} else {
1552 			prev_id = state->refs[i].id;
1553 		}
1554 	}
1555 	return -EINVAL;
1556 }
1557 
1558 static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type,
1559 						   int id, void *ptr)
1560 {
1561 	int i;
1562 
1563 	for (i = 0; i < state->acquired_refs; i++) {
1564 		struct bpf_reference_state *s = &state->refs[i];
1565 
1566 		if (!(s->type & type))
1567 			continue;
1568 
1569 		if (s->id == id && s->ptr == ptr)
1570 			return s;
1571 	}
1572 	return NULL;
1573 }
1574 
1575 static void free_func_state(struct bpf_func_state *state)
1576 {
1577 	if (!state)
1578 		return;
1579 	kfree(state->stack_arg_regs);
1580 	kfree(state->stack);
1581 	kfree(state);
1582 }
1583 
1584 void bpf_clear_jmp_history(struct bpf_verifier_state *state)
1585 {
1586 	kfree(state->jmp_history);
1587 	state->jmp_history = NULL;
1588 	state->jmp_history_cnt = 0;
1589 }
1590 
1591 void bpf_free_verifier_state(struct bpf_verifier_state *state,
1592 			    bool free_self)
1593 {
1594 	int i;
1595 
1596 	for (i = 0; i <= state->curframe; i++) {
1597 		free_func_state(state->frame[i]);
1598 		state->frame[i] = NULL;
1599 	}
1600 	kfree(state->refs);
1601 	bpf_clear_jmp_history(state);
1602 	if (free_self)
1603 		kfree(state);
1604 }
1605 
1606 /* copy verifier state from src to dst growing dst stack space
1607  * when necessary to accommodate larger src stack
1608  */
1609 static int copy_func_state(struct bpf_func_state *dst,
1610 			   const struct bpf_func_state *src)
1611 {
1612 	memcpy(dst, src, offsetof(struct bpf_func_state, stack));
1613 	return copy_stack_state(dst, src);
1614 }
1615 
1616 int bpf_copy_verifier_state(struct bpf_verifier_state *dst_state,
1617 			   const struct bpf_verifier_state *src)
1618 {
1619 	struct bpf_func_state *dst;
1620 	int i, err;
1621 
1622 	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1623 					  src->jmp_history_cnt, sizeof(*dst_state->jmp_history),
1624 					  GFP_KERNEL_ACCOUNT);
1625 	if (!dst_state->jmp_history)
1626 		return -ENOMEM;
1627 	dst_state->jmp_history_cnt = src->jmp_history_cnt;
1628 
1629 	/* if dst has more stack frames then src frame, free them, this is also
1630 	 * necessary in case of exceptional exits using bpf_throw.
1631 	 */
1632 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1633 		free_func_state(dst_state->frame[i]);
1634 		dst_state->frame[i] = NULL;
1635 	}
1636 	err = copy_reference_state(dst_state, src);
1637 	if (err)
1638 		return err;
1639 	dst_state->speculative = src->speculative;
1640 	dst_state->in_sleepable = src->in_sleepable;
1641 	dst_state->curframe = src->curframe;
1642 	dst_state->branches = src->branches;
1643 	dst_state->parent = src->parent;
1644 	dst_state->first_insn_idx = src->first_insn_idx;
1645 	dst_state->last_insn_idx = src->last_insn_idx;
1646 	dst_state->dfs_depth = src->dfs_depth;
1647 	dst_state->callback_unroll_depth = src->callback_unroll_depth;
1648 	dst_state->may_goto_depth = src->may_goto_depth;
1649 	dst_state->equal_state = src->equal_state;
1650 	for (i = 0; i <= src->curframe; i++) {
1651 		dst = dst_state->frame[i];
1652 		if (!dst) {
1653 			dst = kzalloc_obj(*dst, GFP_KERNEL_ACCOUNT);
1654 			if (!dst)
1655 				return -ENOMEM;
1656 			dst_state->frame[i] = dst;
1657 		}
1658 		err = copy_func_state(dst, src->frame[i]);
1659 		if (err)
1660 			return err;
1661 	}
1662 	return 0;
1663 }
1664 
1665 static u32 state_htab_size(struct bpf_verifier_env *env)
1666 {
1667 	return env->prog->len;
1668 }
1669 
1670 struct list_head *bpf_explored_state(struct bpf_verifier_env *env, int idx)
1671 {
1672 	struct bpf_verifier_state *cur = env->cur_state;
1673 	struct bpf_func_state *state = cur->frame[cur->curframe];
1674 
1675 	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
1676 }
1677 
1678 static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b)
1679 {
1680 	int fr;
1681 
1682 	if (a->curframe != b->curframe)
1683 		return false;
1684 
1685 	for (fr = a->curframe; fr >= 0; fr--)
1686 		if (a->frame[fr]->callsite != b->frame[fr]->callsite)
1687 			return false;
1688 
1689 	return true;
1690 }
1691 
1692 
1693 void bpf_free_backedges(struct bpf_scc_visit *visit)
1694 {
1695 	struct bpf_scc_backedge *backedge, *next;
1696 
1697 	for (backedge = visit->backedges; backedge; backedge = next) {
1698 		bpf_free_verifier_state(&backedge->state, false);
1699 		next = backedge->next;
1700 		kfree(backedge);
1701 	}
1702 	visit->backedges = NULL;
1703 }
1704 
1705 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1706 		     int *insn_idx, bool pop_log)
1707 {
1708 	struct bpf_verifier_state *cur = env->cur_state;
1709 	struct bpf_verifier_stack_elem *elem, *head = env->head;
1710 	int err;
1711 
1712 	if (env->head == NULL)
1713 		return -ENOENT;
1714 
1715 	if (cur) {
1716 		err = bpf_copy_verifier_state(cur, &head->st);
1717 		if (err)
1718 			return err;
1719 	}
1720 	if (pop_log)
1721 		bpf_vlog_reset(&env->log, head->log_pos);
1722 	if (insn_idx)
1723 		*insn_idx = head->insn_idx;
1724 	if (prev_insn_idx)
1725 		*prev_insn_idx = head->prev_insn_idx;
1726 	elem = head->next;
1727 	bpf_free_verifier_state(&head->st, false);
1728 	kfree(head);
1729 	env->head = elem;
1730 	env->stack_size--;
1731 	return 0;
1732 }
1733 
1734 static bool error_recoverable_with_nospec(int err)
1735 {
1736 	/* Should only return true for non-fatal errors that are allowed to
1737 	 * occur during speculative verification. For these we can insert a
1738 	 * nospec and the program might still be accepted. Do not include
1739 	 * something like ENOMEM because it is likely to re-occur for the next
1740 	 * architectural path once it has been recovered-from in all speculative
1741 	 * paths.
1742 	 */
1743 	return err == -EPERM || err == -EACCES || err == -EINVAL;
1744 }
1745 
1746 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1747 					     int insn_idx, int prev_insn_idx,
1748 					     bool speculative)
1749 {
1750 	struct bpf_verifier_state *cur = env->cur_state;
1751 	struct bpf_verifier_stack_elem *elem;
1752 	int err;
1753 
1754 	elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT);
1755 	if (!elem)
1756 		return ERR_PTR(-ENOMEM);
1757 
1758 	elem->insn_idx = insn_idx;
1759 	elem->prev_insn_idx = prev_insn_idx;
1760 	elem->next = env->head;
1761 	elem->log_pos = env->log.end_pos;
1762 	env->head = elem;
1763 	env->stack_size++;
1764 	err = bpf_copy_verifier_state(&elem->st, cur);
1765 	if (err)
1766 		return ERR_PTR(-ENOMEM);
1767 	elem->st.speculative |= speculative;
1768 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1769 		verbose(env, "The sequence of %d jumps is too complex.\n",
1770 			env->stack_size);
1771 		return ERR_PTR(-E2BIG);
1772 	}
1773 	if (elem->st.parent) {
1774 		++elem->st.parent->branches;
1775 		/* WARN_ON(branches > 2) technically makes sense here,
1776 		 * but
1777 		 * 1. speculative states will bump 'branches' for non-branch
1778 		 * instructions
1779 		 * 2. is_state_visited() heuristics may decide not to create
1780 		 * a new state for a sequence of branches and all such current
1781 		 * and cloned states will be pointing to a single parent state
1782 		 * which might have large 'branches' count.
1783 		 */
1784 	}
1785 	return &elem->st;
1786 }
1787 
1788 static const char *reg_arg_name(struct bpf_verifier_env *env, argno_t argno)
1789 {
1790 	char *buf = env->tmp_arg_name;
1791 	int len = sizeof(env->tmp_arg_name);
1792 	int arg, regno = reg_from_argno(argno);
1793 
1794 	if (regno >= 0) {
1795 		snprintf(buf, len, "R%d", regno);
1796 	} else {
1797 		arg = arg_from_argno(argno);
1798 		snprintf(buf, len, "*(R11-%u)", (arg - MAX_BPF_FUNC_REG_ARGS) * BPF_REG_SIZE);
1799 	}
1800 
1801 	return buf;
1802 }
1803 
1804 static const int caller_saved[CALLER_SAVED_REGS] = {
1805 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1806 };
1807 
1808 /* This helper doesn't clear reg->id */
1809 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1810 {
1811 	reg->var_off = tnum_const(imm);
1812 	reg->r64 = cnum64_from_urange(imm, imm);
1813 	reg->r32 = cnum32_from_urange((u32)imm, (u32)imm);
1814 }
1815 
1816 /* Mark the unknown part of a register (variable offset or scalar value) as
1817  * known to have the value @imm.
1818  */
1819 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1820 {
1821 	/* Clear off and union(map_ptr, range) */
1822 	memset(((u8 *)reg) + sizeof(reg->type), 0,
1823 	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1824 	reg->id = 0;
1825 	reg->parent_id = 0;
1826 	___mark_reg_known(reg, imm);
1827 }
1828 
1829 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1830 {
1831 	reg->var_off = tnum_const_subreg(reg->var_off, imm);
1832 	reg->r32 = cnum32_from_urange((u32)imm, (u32)imm);
1833 }
1834 
1835 /* Mark the 'variable offset' part of a register as zero.  This should be
1836  * used only on registers holding a pointer type.
1837  */
1838 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1839 {
1840 	__mark_reg_known(reg, 0);
1841 }
1842 
1843 static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1844 {
1845 	__mark_reg_known(reg, 0);
1846 	reg->type = SCALAR_VALUE;
1847 	/* all scalars are assumed imprecise initially (unless unprivileged,
1848 	 * in which case everything is forced to be precise)
1849 	 */
1850 	reg->precise = !env->bpf_capable;
1851 }
1852 
1853 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1854 				struct bpf_reg_state *regs, u32 regno)
1855 {
1856 	__mark_reg_known_zero(regs + regno);
1857 }
1858 
1859 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
1860 			      bool first_slot, int id, int parent_id)
1861 {
1862 	/* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
1863 	 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
1864 	 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
1865 	 */
1866 	__mark_reg_known_zero(reg);
1867 	reg->type = CONST_PTR_TO_DYNPTR;
1868 	/* Give each dynptr a unique id to uniquely associate slices to it. */
1869 	reg->id = id;
1870 	reg->parent_id = parent_id;
1871 	reg->dynptr.type = type;
1872 	reg->dynptr.first_slot = first_slot;
1873 }
1874 
1875 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1876 {
1877 	if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1878 		const struct bpf_map *map = reg->map_ptr;
1879 
1880 		if (map->inner_map_meta) {
1881 			reg->type = CONST_PTR_TO_MAP;
1882 			reg->map_ptr = map->inner_map_meta;
1883 			/* transfer reg's id which is unique for every map_lookup_elem
1884 			 * as UID of the inner map.
1885 			 */
1886 			if (btf_record_has_field(map->inner_map_meta->record,
1887 						 BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
1888 				reg->map_uid = reg->id;
1889 			}
1890 		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1891 			reg->type = PTR_TO_XDP_SOCK;
1892 		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1893 			   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1894 			reg->type = PTR_TO_SOCKET;
1895 		} else {
1896 			reg->type = PTR_TO_MAP_VALUE;
1897 		}
1898 		return;
1899 	}
1900 
1901 	reg->type &= ~PTR_MAYBE_NULL;
1902 }
1903 
1904 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
1905 				struct btf_field_graph_root *ds_head)
1906 {
1907 	__mark_reg_known(&regs[regno], ds_head->node_offset);
1908 	regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
1909 	regs[regno].btf = ds_head->btf;
1910 	regs[regno].btf_id = ds_head->value_btf_id;
1911 }
1912 
1913 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1914 {
1915 	return type_is_pkt_pointer(reg->type);
1916 }
1917 
1918 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1919 {
1920 	return reg_is_pkt_pointer(reg) ||
1921 	       reg->type == PTR_TO_PACKET_END;
1922 }
1923 
1924 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
1925 {
1926 	return base_type(reg->type) == PTR_TO_MEM &&
1927 	       (reg->type &
1928 		(DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META));
1929 }
1930 
1931 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1932 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1933 				    enum bpf_reg_type which)
1934 {
1935 	/* The register can already have a range from prior markings.
1936 	 * This is fine as long as it hasn't been advanced from its
1937 	 * origin.
1938 	 */
1939 	return reg->type == which &&
1940 	       reg->id == 0 &&
1941 	       tnum_equals_const(reg->var_off, 0);
1942 }
1943 
1944 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1945 {
1946 	reg->r32 = CNUM32_UNBOUNDED;
1947 }
1948 
1949 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1950 {
1951 	reg->r64 = CNUM64_UNBOUNDED;
1952 }
1953 
1954 /* Reset the min/max bounds of a register */
1955 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1956 {
1957 	__mark_reg64_unbounded(reg);
1958 	__mark_reg32_unbounded(reg);
1959 }
1960 
1961 static void reset_reg64_and_tnum(struct bpf_reg_state *reg)
1962 {
1963 	__mark_reg64_unbounded(reg);
1964 	reg->var_off = tnum_unknown;
1965 }
1966 
1967 static void reset_reg32_and_tnum(struct bpf_reg_state *reg)
1968 {
1969 	__mark_reg32_unbounded(reg);
1970 	reg->var_off = tnum_unknown;
1971 }
1972 
1973 static struct cnum32 cnum32_from_tnum(struct tnum tnum)
1974 {
1975 	tnum = tnum_subreg(tnum);
1976 	if ((tnum.mask & S32_MIN) || (tnum.value & S32_MIN))
1977 		/* min signed is max(sign bit) | min(other bits) */
1978 		/* max signed is min(sign bit) | max(other bits) */
1979 		return cnum32_from_srange(tnum.value | (tnum.mask & S32_MIN),
1980 					  tnum.value | (tnum.mask & S32_MAX));
1981 	else
1982 		return cnum32_from_urange(tnum.value, (tnum.value | tnum.mask));
1983 }
1984 
1985 static struct cnum64 cnum64_from_tnum(struct tnum tnum)
1986 {
1987 	if ((tnum.mask & S64_MIN) || (tnum.value & S64_MIN))
1988 		/* min signed is max(sign bit) | min(other bits) */
1989 		/* max signed is min(sign bit) | max(other bits) */
1990 		return cnum64_from_srange(tnum.value | (tnum.mask & S64_MIN),
1991 					  tnum.value | (tnum.mask & S64_MAX));
1992 	else
1993 		return cnum64_from_urange(tnum.value, (tnum.value | tnum.mask));
1994 }
1995 
1996 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1997 {
1998 	cnum32_intersect_with(&reg->r32, cnum32_from_tnum(reg->var_off));
1999 }
2000 
2001 static void __update_reg64_bounds(struct bpf_reg_state *reg)
2002 {
2003 	u64 tnum_next, tmax;
2004 	bool umin_in_tnum;
2005 
2006 	cnum64_intersect_with(&reg->r64, cnum64_from_tnum(reg->var_off));
2007 
2008 	/* Check if u64 and tnum overlap in a single value */
2009 	tnum_next = tnum_step(reg->var_off, reg_umin(reg));
2010 	umin_in_tnum = (reg_umin(reg) & ~reg->var_off.mask) == reg->var_off.value;
2011 	tmax = reg->var_off.value | reg->var_off.mask;
2012 	if (umin_in_tnum && tnum_next > reg_umax(reg)) {
2013 		/* The u64 range and the tnum only overlap in umin.
2014 		 * u64:  ---[xxxxxx]-----
2015 		 * tnum: --xx----------x-
2016 		 */
2017 		___mark_reg_known(reg, reg_umin(reg));
2018 	} else if (!umin_in_tnum && tnum_next == tmax) {
2019 		/* The u64 range and the tnum only overlap in the maximum value
2020 		 * represented by the tnum, called tmax.
2021 		 * u64:  ---[xxxxxx]-----
2022 		 * tnum: xx-----x--------
2023 		 */
2024 		___mark_reg_known(reg, tmax);
2025 	} else if (!umin_in_tnum && tnum_next <= reg_umax(reg) &&
2026 		   tnum_step(reg->var_off, tnum_next) > reg_umax(reg)) {
2027 		/* The u64 range and the tnum only overlap in between umin
2028 		 * (excluded) and umax.
2029 		 * u64:  ---[xxxxxx]-----
2030 		 * tnum: xx----x-------x-
2031 		 */
2032 		___mark_reg_known(reg, tnum_next);
2033 	}
2034 }
2035 
2036 static void __update_reg_bounds(struct bpf_reg_state *reg)
2037 {
2038 	__update_reg32_bounds(reg);
2039 	__update_reg64_bounds(reg);
2040 }
2041 
2042 static void deduce_bounds_32_from_64(struct bpf_reg_state *reg)
2043 {
2044 	cnum32_intersect_with(&reg->r32, cnum32_from_cnum64(reg->r64));
2045 }
2046 
2047 static void deduce_bounds_64_from_32(struct bpf_reg_state *reg)
2048 {
2049 	reg->r64 = cnum64_cnum32_intersect(reg->r64, reg->r32);
2050 }
2051 
2052 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2053 {
2054 	deduce_bounds_32_from_64(reg);
2055 	deduce_bounds_64_from_32(reg);
2056 }
2057 
2058 /* Attempts to improve var_off based on unsigned min/max information */
2059 static void __reg_bound_offset(struct bpf_reg_state *reg)
2060 {
2061 	struct tnum var64_off = tnum_intersect(reg->var_off,
2062 					       tnum_range(reg_umin(reg),
2063 							  reg_umax(reg)));
2064 	struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off),
2065 					       tnum_range(reg_u32_min(reg),
2066 							  reg_u32_max(reg)));
2067 
2068 	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
2069 }
2070 
2071 static bool range_bounds_violation(struct bpf_reg_state *reg);
2072 
2073 static void reg_bounds_sync(struct bpf_reg_state *reg)
2074 {
2075 	/* If the input reg_state is invalid, we can exit early */
2076 	if (range_bounds_violation(reg))
2077 		return;
2078 	/* We might have learned new bounds from the var_off. */
2079 	__update_reg_bounds(reg);
2080 	/* We might have learned something about the sign bit. */
2081 	__reg_deduce_bounds(reg);
2082 	__reg_deduce_bounds(reg);
2083 	/* We might have learned some bits from the bounds. */
2084 	__reg_bound_offset(reg);
2085 	/* Intersecting with the old var_off might have improved our bounds
2086 	 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2087 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
2088 	 */
2089 	__update_reg_bounds(reg);
2090 }
2091 
2092 static bool const_tnum_range_mismatch(struct bpf_reg_state *reg)
2093 {
2094 	if (!tnum_is_const(reg->var_off))
2095 		return false;
2096 
2097 	return !cnum64_is_const(reg->r64) || reg->r64.base != reg->var_off.value;
2098 }
2099 
2100 static bool const_tnum_range_mismatch_32(struct bpf_reg_state *reg)
2101 {
2102 	if (!tnum_subreg_is_const(reg->var_off))
2103 		return false;
2104 
2105 	return !cnum32_is_const(reg->r32) || reg->r32.base != tnum_subreg(reg->var_off).value;
2106 }
2107 
2108 static bool range_bounds_violation(struct bpf_reg_state *reg)
2109 {
2110 	return cnum32_is_empty(reg->r32) || cnum64_is_empty(reg->r64);
2111 }
2112 
2113 static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
2114 				   struct bpf_reg_state *reg, const char *ctx)
2115 {
2116 	const char *msg;
2117 
2118 	if (range_bounds_violation(reg)) {
2119 		msg = "range bounds violation";
2120 		goto out;
2121 	}
2122 
2123 	if (const_tnum_range_mismatch(reg)) {
2124 		msg = "const tnum out of sync with range bounds";
2125 		goto out;
2126 	}
2127 
2128 	if (const_tnum_range_mismatch_32(reg)) {
2129 		msg = "const subreg tnum out of sync with range bounds";
2130 		goto out;
2131 	}
2132 
2133 	return 0;
2134 out:
2135 	verifier_bug(env, "REG INVARIANTS VIOLATION (%s): %s r64={.base=%#llx, .size=%#llx} "
2136 		     "r32={.base=%#x, .size=%#x} var_off=(%#llx, %#llx)",
2137 		     ctx, msg,
2138 		     reg->r64.base, reg->r64.size,
2139 		     reg->r32.base, reg->r32.size,
2140 		     reg->var_off.value, reg->var_off.mask);
2141 	if (env->test_reg_invariants)
2142 		return -EFAULT;
2143 	__mark_reg_unbounded(reg);
2144 	return 0;
2145 }
2146 
2147 /* Mark a register as having a completely unknown (scalar) value. */
2148 void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
2149 {
2150 	s32 subreg_def = reg->subreg_def;
2151 
2152 	memset(reg, 0, sizeof(*reg));
2153 	reg->type = SCALAR_VALUE;
2154 	reg->var_off = tnum_unknown;
2155 	reg->subreg_def = subreg_def;
2156 	__mark_reg_unbounded(reg);
2157 }
2158 
2159 /* Mark a register as having a completely unknown (scalar) value,
2160  * initialize .precise as true when not bpf capable.
2161  */
2162 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2163 			       struct bpf_reg_state *reg)
2164 {
2165 	bpf_mark_reg_unknown_imprecise(reg);
2166 	reg->precise = !env->bpf_capable;
2167 }
2168 
2169 static void mark_reg_unknown(struct bpf_verifier_env *env,
2170 			     struct bpf_reg_state *regs, u32 regno)
2171 {
2172 	__mark_reg_unknown(env, regs + regno);
2173 }
2174 
2175 static int __mark_reg_s32_range(struct bpf_verifier_env *env,
2176 				struct bpf_reg_state *regs,
2177 				u32 regno,
2178 				s32 s32_min,
2179 				s32 s32_max)
2180 {
2181 	struct bpf_reg_state *reg = regs + regno;
2182 
2183 	reg_set_srange32(reg,
2184 			 max_t(s32, reg_s32_min(reg), s32_min),
2185 			 min_t(s32, reg_s32_max(reg), s32_max));
2186 	reg_set_srange64(reg,
2187 			 max_t(s64, reg_smin(reg), s32_min),
2188 			 min_t(s64, reg_smax(reg), s32_max));
2189 
2190 	reg_bounds_sync(reg);
2191 
2192 	return reg_bounds_sanity_check(env, reg, "s32_range");
2193 }
2194 
2195 void bpf_mark_reg_not_init(const struct bpf_verifier_env *env,
2196 			   struct bpf_reg_state *reg)
2197 {
2198 	__mark_reg_unknown(env, reg);
2199 	reg->type = NOT_INIT;
2200 }
2201 
2202 static int mark_btf_ld_reg(struct bpf_verifier_env *env,
2203 			   struct bpf_reg_state *regs, u32 regno,
2204 			   enum bpf_reg_type reg_type,
2205 			   struct btf *btf, u32 btf_id,
2206 			   enum bpf_type_flag flag)
2207 {
2208 	switch (reg_type) {
2209 	case SCALAR_VALUE:
2210 		mark_reg_unknown(env, regs, regno);
2211 		return 0;
2212 	case PTR_TO_BTF_ID:
2213 		mark_reg_known_zero(env, regs, regno);
2214 		regs[regno].type = PTR_TO_BTF_ID | flag;
2215 		regs[regno].btf = btf;
2216 		regs[regno].btf_id = btf_id;
2217 		if (type_may_be_null(flag))
2218 			regs[regno].id = ++env->id_gen;
2219 		return 0;
2220 	case PTR_TO_MEM:
2221 		mark_reg_known_zero(env, regs, regno);
2222 		regs[regno].type = PTR_TO_MEM | flag;
2223 		regs[regno].mem_size = 0;
2224 		return 0;
2225 	default:
2226 		verifier_bug(env, "unexpected reg_type %d in %s\n", reg_type, __func__);
2227 		return -EFAULT;
2228 	}
2229 }
2230 
2231 #define DEF_NOT_SUBREG	(0)
2232 static void init_reg_state(struct bpf_verifier_env *env,
2233 			   struct bpf_func_state *state)
2234 {
2235 	struct bpf_reg_state *regs = state->regs;
2236 	int i;
2237 
2238 	for (i = 0; i < MAX_BPF_REG; i++) {
2239 		bpf_mark_reg_not_init(env, &regs[i]);
2240 		regs[i].subreg_def = DEF_NOT_SUBREG;
2241 	}
2242 
2243 	/* frame pointer */
2244 	regs[BPF_REG_FP].type = PTR_TO_STACK;
2245 	mark_reg_known_zero(env, regs, BPF_REG_FP);
2246 	regs[BPF_REG_FP].frameno = state->frameno;
2247 }
2248 
2249 static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
2250 {
2251 	/*
2252 	 * return_32bit is set to false by default and set explicitly
2253 	 * by the caller when necessary.
2254 	 */
2255 	return (struct bpf_retval_range){ minval, maxval, false };
2256 }
2257 
2258 static void init_func_state(struct bpf_verifier_env *env,
2259 			    struct bpf_func_state *state,
2260 			    int callsite, int frameno, int subprogno)
2261 {
2262 	state->callsite = callsite;
2263 	state->frameno = frameno;
2264 	state->subprogno = subprogno;
2265 	state->callback_ret_range = retval_range(0, 0);
2266 	init_reg_state(env, state);
2267 	mark_verifier_state_scratched(env);
2268 }
2269 
2270 /* Similar to push_stack(), but for async callbacks */
2271 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
2272 						int insn_idx, int prev_insn_idx,
2273 						int subprog, bool is_sleepable)
2274 {
2275 	struct bpf_verifier_stack_elem *elem;
2276 	struct bpf_func_state *frame;
2277 
2278 	elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT);
2279 	if (!elem)
2280 		return ERR_PTR(-ENOMEM);
2281 
2282 	elem->insn_idx = insn_idx;
2283 	elem->prev_insn_idx = prev_insn_idx;
2284 	elem->next = env->head;
2285 	elem->log_pos = env->log.end_pos;
2286 	env->head = elem;
2287 	env->stack_size++;
2288 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2289 		verbose(env,
2290 			"The sequence of %d jumps is too complex for async cb.\n",
2291 			env->stack_size);
2292 		return ERR_PTR(-E2BIG);
2293 	}
2294 	/* Unlike push_stack() do not bpf_copy_verifier_state().
2295 	 * The caller state doesn't matter.
2296 	 * This is async callback. It starts in a fresh stack.
2297 	 * Initialize it similar to do_check_common().
2298 	 */
2299 	elem->st.branches = 1;
2300 	elem->st.in_sleepable = is_sleepable;
2301 	frame = kzalloc_obj(*frame, GFP_KERNEL_ACCOUNT);
2302 	if (!frame)
2303 		return ERR_PTR(-ENOMEM);
2304 	init_func_state(env, frame,
2305 			BPF_MAIN_FUNC /* callsite */,
2306 			0 /* frameno within this callchain */,
2307 			subprog /* subprog number within this prog */);
2308 	elem->st.frame[0] = frame;
2309 	return &elem->st;
2310 }
2311 
2312 
2313 static int cmp_subprogs(const void *a, const void *b)
2314 {
2315 	return ((struct bpf_subprog_info *)a)->start -
2316 	       ((struct bpf_subprog_info *)b)->start;
2317 }
2318 
2319 /* Find subprogram that contains instruction at 'off' */
2320 struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off)
2321 {
2322 	struct bpf_subprog_info *vals = env->subprog_info;
2323 	int l, r, m;
2324 
2325 	if (off >= env->prog->len || off < 0 || env->subprog_cnt == 0)
2326 		return NULL;
2327 
2328 	l = 0;
2329 	r = env->subprog_cnt - 1;
2330 	while (l < r) {
2331 		m = l + (r - l + 1) / 2;
2332 		if (vals[m].start <= off)
2333 			l = m;
2334 		else
2335 			r = m - 1;
2336 	}
2337 	return &vals[l];
2338 }
2339 
2340 /* Find subprogram that starts exactly at 'off' */
2341 int bpf_find_subprog(struct bpf_verifier_env *env, int off)
2342 {
2343 	struct bpf_subprog_info *p;
2344 
2345 	p = bpf_find_containing_subprog(env, off);
2346 	if (!p || p->start != off)
2347 		return -ENOENT;
2348 	return p - env->subprog_info;
2349 }
2350 
2351 static int add_subprog(struct bpf_verifier_env *env, int off)
2352 {
2353 	int insn_cnt = env->prog->len;
2354 	int ret;
2355 
2356 	if (off >= insn_cnt || off < 0) {
2357 		verbose(env, "call to invalid destination\n");
2358 		return -EINVAL;
2359 	}
2360 	ret = bpf_find_subprog(env, off);
2361 	if (ret >= 0)
2362 		return ret;
2363 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2364 		verbose(env, "too many subprograms\n");
2365 		return -E2BIG;
2366 	}
2367 	/* determine subprog starts. The end is one before the next starts */
2368 	env->subprog_info[env->subprog_cnt++].start = off;
2369 	sort(env->subprog_info, env->subprog_cnt,
2370 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
2371 	return env->subprog_cnt - 1;
2372 }
2373 
2374 static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
2375 {
2376 	struct bpf_prog_aux *aux = env->prog->aux;
2377 	struct btf *btf = aux->btf;
2378 	const struct btf_type *t;
2379 	u32 main_btf_id, id;
2380 	const char *name;
2381 	int ret, i;
2382 
2383 	/* Non-zero func_info_cnt implies valid btf */
2384 	if (!aux->func_info_cnt)
2385 		return 0;
2386 	main_btf_id = aux->func_info[0].type_id;
2387 
2388 	t = btf_type_by_id(btf, main_btf_id);
2389 	if (!t) {
2390 		verbose(env, "invalid btf id for main subprog in func_info\n");
2391 		return -EINVAL;
2392 	}
2393 
2394 	name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:");
2395 	if (IS_ERR(name)) {
2396 		ret = PTR_ERR(name);
2397 		/* If there is no tag present, there is no exception callback */
2398 		if (ret == -ENOENT)
2399 			ret = 0;
2400 		else if (ret == -EEXIST)
2401 			verbose(env, "multiple exception callback tags for main subprog\n");
2402 		return ret;
2403 	}
2404 
2405 	ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC);
2406 	if (ret < 0) {
2407 		verbose(env, "exception callback '%s' could not be found in BTF\n", name);
2408 		return ret;
2409 	}
2410 	id = ret;
2411 	t = btf_type_by_id(btf, id);
2412 	if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
2413 		verbose(env, "exception callback '%s' must have global linkage\n", name);
2414 		return -EINVAL;
2415 	}
2416 	ret = 0;
2417 	for (i = 0; i < aux->func_info_cnt; i++) {
2418 		if (aux->func_info[i].type_id != id)
2419 			continue;
2420 		ret = aux->func_info[i].insn_off;
2421 		/* Further func_info and subprog checks will also happen
2422 		 * later, so assume this is the right insn_off for now.
2423 		 */
2424 		if (!ret) {
2425 			verbose(env, "invalid exception callback insn_off in func_info: 0\n");
2426 			ret = -EINVAL;
2427 		}
2428 	}
2429 	if (!ret) {
2430 		verbose(env, "exception callback type id not found in func_info\n");
2431 		ret = -EINVAL;
2432 	}
2433 	return ret;
2434 }
2435 
2436 #define MAX_KFUNC_BTFS	256
2437 
2438 struct bpf_kfunc_btf {
2439 	struct btf *btf;
2440 	struct module *module;
2441 	u16 offset;
2442 };
2443 
2444 struct bpf_kfunc_btf_tab {
2445 	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2446 	u32 nr_descs;
2447 };
2448 
2449 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
2450 {
2451 	const struct bpf_kfunc_desc *d0 = a;
2452 	const struct bpf_kfunc_desc *d1 = b;
2453 
2454 	/* func_id is not greater than BTF_MAX_TYPE */
2455 	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2456 }
2457 
2458 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
2459 {
2460 	const struct bpf_kfunc_btf *d0 = a;
2461 	const struct bpf_kfunc_btf *d1 = b;
2462 
2463 	return d0->offset - d1->offset;
2464 }
2465 
2466 static struct bpf_kfunc_desc *
2467 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
2468 {
2469 	struct bpf_kfunc_desc desc = {
2470 		.func_id = func_id,
2471 		.offset = offset,
2472 	};
2473 	struct bpf_kfunc_desc_tab *tab;
2474 
2475 	tab = prog->aux->kfunc_tab;
2476 	return bsearch(&desc, tab->descs, tab->nr_descs,
2477 		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
2478 }
2479 
2480 int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
2481 		       u16 btf_fd_idx, u8 **func_addr)
2482 {
2483 	const struct bpf_kfunc_desc *desc;
2484 
2485 	desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
2486 	if (!desc)
2487 		return -EFAULT;
2488 
2489 	*func_addr = (u8 *)desc->addr;
2490 	return 0;
2491 }
2492 
2493 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
2494 					 s16 offset)
2495 {
2496 	struct bpf_kfunc_btf kf_btf = { .offset = offset };
2497 	struct bpf_kfunc_btf_tab *tab;
2498 	struct bpf_kfunc_btf *b;
2499 	struct module *mod;
2500 	struct btf *btf;
2501 	int btf_fd;
2502 
2503 	tab = env->prog->aux->kfunc_btf_tab;
2504 	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
2505 		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
2506 	if (!b) {
2507 		if (tab->nr_descs == MAX_KFUNC_BTFS) {
2508 			verbose(env, "too many different module BTFs\n");
2509 			return ERR_PTR(-E2BIG);
2510 		}
2511 
2512 		if (bpfptr_is_null(env->fd_array)) {
2513 			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
2514 			return ERR_PTR(-EPROTO);
2515 		}
2516 
2517 		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
2518 					    offset * sizeof(btf_fd),
2519 					    sizeof(btf_fd)))
2520 			return ERR_PTR(-EFAULT);
2521 
2522 		btf = btf_get_by_fd(btf_fd);
2523 		if (IS_ERR(btf)) {
2524 			verbose(env, "invalid module BTF fd specified\n");
2525 			return btf;
2526 		}
2527 
2528 		if (!btf_is_module(btf)) {
2529 			verbose(env, "BTF fd for kfunc is not a module BTF\n");
2530 			btf_put(btf);
2531 			return ERR_PTR(-EINVAL);
2532 		}
2533 
2534 		mod = btf_try_get_module(btf);
2535 		if (!mod) {
2536 			btf_put(btf);
2537 			return ERR_PTR(-ENXIO);
2538 		}
2539 
2540 		b = &tab->descs[tab->nr_descs++];
2541 		b->btf = btf;
2542 		b->module = mod;
2543 		b->offset = offset;
2544 
2545 		/* sort() reorders entries by value, so b may no longer point
2546 		 * to the right entry after this
2547 		 */
2548 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2549 		     kfunc_btf_cmp_by_off, NULL);
2550 	} else {
2551 		btf = b->btf;
2552 	}
2553 
2554 	return btf;
2555 }
2556 
2557 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2558 {
2559 	if (!tab)
2560 		return;
2561 
2562 	while (tab->nr_descs--) {
2563 		module_put(tab->descs[tab->nr_descs].module);
2564 		btf_put(tab->descs[tab->nr_descs].btf);
2565 	}
2566 	kfree(tab);
2567 }
2568 
2569 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
2570 {
2571 	if (offset) {
2572 		if (offset < 0) {
2573 			/* In the future, this can be allowed to increase limit
2574 			 * of fd index into fd_array, interpreted as u16.
2575 			 */
2576 			verbose(env, "negative offset disallowed for kernel module function call\n");
2577 			return ERR_PTR(-EINVAL);
2578 		}
2579 
2580 		return __find_kfunc_desc_btf(env, offset);
2581 	}
2582 	return btf_vmlinux ?: ERR_PTR(-ENOENT);
2583 }
2584 
2585 #define KF_IMPL_SUFFIX "_impl"
2586 
2587 static const struct btf_type *find_kfunc_impl_proto(struct bpf_verifier_env *env,
2588 						    struct btf *btf,
2589 						    const char *func_name)
2590 {
2591 	char *buf = env->tmp_str_buf;
2592 	const struct btf_type *func;
2593 	s32 impl_id;
2594 	int len;
2595 
2596 	len = snprintf(buf, TMP_STR_BUF_LEN, "%s%s", func_name, KF_IMPL_SUFFIX);
2597 	if (len < 0 || len >= TMP_STR_BUF_LEN) {
2598 		verbose(env, "function name %s%s is too long\n", func_name, KF_IMPL_SUFFIX);
2599 		return NULL;
2600 	}
2601 
2602 	impl_id = btf_find_by_name_kind(btf, buf, BTF_KIND_FUNC);
2603 	if (impl_id <= 0) {
2604 		verbose(env, "cannot find function %s in BTF\n", buf);
2605 		return NULL;
2606 	}
2607 
2608 	func = btf_type_by_id(btf, impl_id);
2609 
2610 	return btf_type_by_id(btf, func->type);
2611 }
2612 
2613 static int fetch_kfunc_meta(struct bpf_verifier_env *env,
2614 			    s32 func_id,
2615 			    s16 offset,
2616 			    struct bpf_kfunc_meta *kfunc)
2617 {
2618 	const struct btf_type *func, *func_proto;
2619 	const char *func_name;
2620 	u32 *kfunc_flags;
2621 	struct btf *btf;
2622 
2623 	if (func_id <= 0) {
2624 		verbose(env, "invalid kernel function btf_id %d\n", func_id);
2625 		return -EINVAL;
2626 	}
2627 
2628 	btf = find_kfunc_desc_btf(env, offset);
2629 	if (IS_ERR(btf)) {
2630 		verbose(env, "failed to find BTF for kernel function\n");
2631 		return PTR_ERR(btf);
2632 	}
2633 
2634 	/*
2635 	 * Note that kfunc_flags may be NULL at this point, which
2636 	 * means that we couldn't find func_id in any relevant
2637 	 * kfunc_id_set. This most likely indicates an invalid kfunc
2638 	 * call.  However we don't fail with an error here,
2639 	 * and let the caller decide what to do with NULL kfunc->flags.
2640 	 */
2641 	kfunc_flags = btf_kfunc_flags(btf, func_id, env->prog);
2642 
2643 	func = btf_type_by_id(btf, func_id);
2644 	if (!func || !btf_type_is_func(func)) {
2645 		verbose(env, "kernel btf_id %d is not a function\n", func_id);
2646 		return -EINVAL;
2647 	}
2648 
2649 	func_name = btf_name_by_offset(btf, func->name_off);
2650 
2651 	/*
2652 	 * An actual prototype of a kfunc with KF_IMPLICIT_ARGS flag
2653 	 * can be found through the counterpart _impl kfunc.
2654 	 */
2655 	if (kfunc_flags && (*kfunc_flags & KF_IMPLICIT_ARGS))
2656 		func_proto = find_kfunc_impl_proto(env, btf, func_name);
2657 	else
2658 		func_proto = btf_type_by_id(btf, func->type);
2659 
2660 	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2661 		verbose(env, "kernel function btf_id %d does not have a valid func_proto\n",
2662 			func_id);
2663 		return -EINVAL;
2664 	}
2665 
2666 	memset(kfunc, 0, sizeof(*kfunc));
2667 	kfunc->btf = btf;
2668 	kfunc->id = func_id;
2669 	kfunc->name = func_name;
2670 	kfunc->proto = func_proto;
2671 	kfunc->flags = kfunc_flags;
2672 
2673 	return 0;
2674 }
2675 
2676 int bpf_add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, u16 offset)
2677 {
2678 	struct bpf_kfunc_btf_tab *btf_tab;
2679 	struct btf_func_model func_model;
2680 	struct bpf_kfunc_desc_tab *tab;
2681 	struct bpf_prog_aux *prog_aux;
2682 	struct bpf_kfunc_meta kfunc;
2683 	struct bpf_kfunc_desc *desc;
2684 	unsigned long addr;
2685 	int err;
2686 
2687 	prog_aux = env->prog->aux;
2688 	tab = prog_aux->kfunc_tab;
2689 	btf_tab = prog_aux->kfunc_btf_tab;
2690 	if (!tab) {
2691 		if (!btf_vmlinux) {
2692 			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2693 			return -ENOTSUPP;
2694 		}
2695 
2696 		if (!env->prog->jit_requested) {
2697 			verbose(env, "JIT is required for calling kernel function\n");
2698 			return -ENOTSUPP;
2699 		}
2700 
2701 		if (!bpf_jit_supports_kfunc_call()) {
2702 			verbose(env, "JIT does not support calling kernel function\n");
2703 			return -ENOTSUPP;
2704 		}
2705 
2706 		if (!env->prog->gpl_compatible) {
2707 			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
2708 			return -EINVAL;
2709 		}
2710 
2711 		tab = kzalloc_obj(*tab, GFP_KERNEL_ACCOUNT);
2712 		if (!tab)
2713 			return -ENOMEM;
2714 		prog_aux->kfunc_tab = tab;
2715 	}
2716 
2717 	/* func_id == 0 is always invalid, but instead of returning an error, be
2718 	 * conservative and wait until the code elimination pass before returning
2719 	 * error, so that invalid calls that get pruned out can be in BPF programs
2720 	 * loaded from userspace.  It is also required that offset be untouched
2721 	 * for such calls.
2722 	 */
2723 	if (!func_id && !offset)
2724 		return 0;
2725 
2726 	if (!btf_tab && offset) {
2727 		btf_tab = kzalloc_obj(*btf_tab, GFP_KERNEL_ACCOUNT);
2728 		if (!btf_tab)
2729 			return -ENOMEM;
2730 		prog_aux->kfunc_btf_tab = btf_tab;
2731 	}
2732 
2733 	if (find_kfunc_desc(env->prog, func_id, offset))
2734 		return 0;
2735 
2736 	if (tab->nr_descs == MAX_KFUNC_DESCS) {
2737 		verbose(env, "too many different kernel function calls\n");
2738 		return -E2BIG;
2739 	}
2740 
2741 	err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
2742 	if (err)
2743 		return err;
2744 
2745 	addr = kallsyms_lookup_name(kfunc.name);
2746 	if (!addr) {
2747 		verbose(env, "cannot find address for kernel function %s\n", kfunc.name);
2748 		return -EINVAL;
2749 	}
2750 
2751 	if (bpf_dev_bound_kfunc_id(func_id)) {
2752 		err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
2753 		if (err)
2754 			return err;
2755 	}
2756 
2757 	err = btf_distill_func_proto(&env->log, kfunc.btf, kfunc.proto, kfunc.name, &func_model);
2758 	if (err)
2759 		return err;
2760 
2761 	desc = &tab->descs[tab->nr_descs++];
2762 	desc->func_id = func_id;
2763 	desc->offset = offset;
2764 	desc->addr = addr;
2765 	desc->func_model = func_model;
2766 	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2767 	     kfunc_desc_cmp_by_id_off, NULL);
2768 	return 0;
2769 }
2770 
2771 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
2772 {
2773 	return !!prog->aux->kfunc_tab;
2774 }
2775 
2776 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
2777 {
2778 	struct bpf_subprog_info *subprog = env->subprog_info;
2779 	int i, ret, insn_cnt = env->prog->len, ex_cb_insn;
2780 	struct bpf_insn *insn = env->prog->insnsi;
2781 
2782 	/* Add entry function. */
2783 	ret = add_subprog(env, 0);
2784 	if (ret)
2785 		return ret;
2786 
2787 	for (i = 0; i < insn_cnt; i++, insn++) {
2788 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2789 		    !bpf_pseudo_kfunc_call(insn))
2790 			continue;
2791 
2792 		if (!env->bpf_capable) {
2793 			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
2794 			return -EPERM;
2795 		}
2796 
2797 		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
2798 			ret = add_subprog(env, i + insn->imm + 1);
2799 		else
2800 			ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
2801 
2802 		if (ret < 0)
2803 			return ret;
2804 	}
2805 
2806 	ret = bpf_find_exception_callback_insn_off(env);
2807 	if (ret < 0)
2808 		return ret;
2809 	ex_cb_insn = ret;
2810 
2811 	/* If ex_cb_insn > 0, this means that the main program has a subprog
2812 	 * marked using BTF decl tag to serve as the exception callback.
2813 	 */
2814 	if (ex_cb_insn) {
2815 		ret = add_subprog(env, ex_cb_insn);
2816 		if (ret < 0)
2817 			return ret;
2818 		for (i = 1; i < env->subprog_cnt; i++) {
2819 			if (env->subprog_info[i].start != ex_cb_insn)
2820 				continue;
2821 			env->exception_callback_subprog = i;
2822 			bpf_mark_subprog_exc_cb(env, i);
2823 			break;
2824 		}
2825 	}
2826 
2827 	/* Add a fake 'exit' subprog which could simplify subprog iteration
2828 	 * logic. 'subprog_cnt' should not be increased.
2829 	 */
2830 	subprog[env->subprog_cnt].start = insn_cnt;
2831 
2832 	if (env->log.level & BPF_LOG_LEVEL2)
2833 		for (i = 0; i < env->subprog_cnt; i++)
2834 			verbose(env, "func#%d @%d\n", i, subprog[i].start);
2835 
2836 	return 0;
2837 }
2838 
2839 static int check_subprogs(struct bpf_verifier_env *env)
2840 {
2841 	int i, subprog_start, subprog_end, off, cur_subprog = 0;
2842 	struct bpf_subprog_info *subprog = env->subprog_info;
2843 	struct bpf_insn *insn = env->prog->insnsi;
2844 	int insn_cnt = env->prog->len;
2845 
2846 	/* now check that all jumps are within the same subprog */
2847 	subprog_start = subprog[cur_subprog].start;
2848 	subprog_end = subprog[cur_subprog + 1].start;
2849 	for (i = 0; i < insn_cnt; i++) {
2850 		u8 code = insn[i].code;
2851 
2852 		if (code == (BPF_JMP | BPF_CALL) &&
2853 		    insn[i].src_reg == 0 &&
2854 		    insn[i].imm == BPF_FUNC_tail_call) {
2855 			subprog[cur_subprog].has_tail_call = true;
2856 			subprog[cur_subprog].tail_call_reachable = true;
2857 		}
2858 		if (BPF_CLASS(code) == BPF_LD &&
2859 		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2860 			subprog[cur_subprog].has_ld_abs = true;
2861 		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2862 			goto next;
2863 		if (BPF_OP(code) == BPF_CALL)
2864 			goto next;
2865 		if (BPF_OP(code) == BPF_EXIT) {
2866 			subprog[cur_subprog].exit_idx = i;
2867 			goto next;
2868 		}
2869 		off = i + bpf_jmp_offset(&insn[i]) + 1;
2870 		if (off < subprog_start || off >= subprog_end) {
2871 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
2872 			return -EINVAL;
2873 		}
2874 next:
2875 		if (i == subprog_end - 1) {
2876 			/* to avoid fall-through from one subprog into another
2877 			 * the last insn of the subprog should be either exit
2878 			 * or unconditional jump back or bpf_throw call
2879 			 */
2880 			if (code != (BPF_JMP | BPF_EXIT) &&
2881 			    code != (BPF_JMP32 | BPF_JA) &&
2882 			    code != (BPF_JMP | BPF_JA)) {
2883 				verbose(env, "last insn is not an exit or jmp\n");
2884 				return -EINVAL;
2885 			}
2886 			subprog_start = subprog_end;
2887 			cur_subprog++;
2888 			if (cur_subprog < env->subprog_cnt)
2889 				subprog_end = subprog[cur_subprog + 1].start;
2890 		}
2891 	}
2892 	return 0;
2893 }
2894 
2895 /*
2896  * Sort subprogs in topological order so that leaf subprogs come first and
2897  * their callers come later. This is a DFS post-order traversal of the call
2898  * graph. Scan only reachable instructions (those in the computed postorder) of
2899  * the current subprog to discover callees (direct subprogs and sync
2900  * callbacks).
2901  */
2902 static int sort_subprogs_topo(struct bpf_verifier_env *env)
2903 {
2904 	struct bpf_subprog_info *si = env->subprog_info;
2905 	int *insn_postorder = env->cfg.insn_postorder;
2906 	struct bpf_insn *insn = env->prog->insnsi;
2907 	int cnt = env->subprog_cnt;
2908 	int *dfs_stack = NULL;
2909 	int top = 0, order = 0;
2910 	int i, ret = 0;
2911 	u8 *color = NULL;
2912 
2913 	color = kvzalloc_objs(*color, cnt, GFP_KERNEL_ACCOUNT);
2914 	dfs_stack = kvmalloc_objs(*dfs_stack, cnt, GFP_KERNEL_ACCOUNT);
2915 	if (!color || !dfs_stack) {
2916 		ret = -ENOMEM;
2917 		goto out;
2918 	}
2919 
2920 	/*
2921 	 * DFS post-order traversal.
2922 	 * Color values: 0 = unvisited, 1 = on stack, 2 = done.
2923 	 */
2924 	for (i = 0; i < cnt; i++) {
2925 		if (color[i])
2926 			continue;
2927 		color[i] = 1;
2928 		dfs_stack[top++] = i;
2929 
2930 		while (top > 0) {
2931 			int cur = dfs_stack[top - 1];
2932 			int po_start = si[cur].postorder_start;
2933 			int po_end = si[cur + 1].postorder_start;
2934 			bool pushed = false;
2935 			int j;
2936 
2937 			for (j = po_start; j < po_end; j++) {
2938 				int idx = insn_postorder[j];
2939 				int callee;
2940 
2941 				if (!bpf_pseudo_call(&insn[idx]) && !bpf_pseudo_func(&insn[idx]))
2942 					continue;
2943 				callee = bpf_find_subprog(env, idx + insn[idx].imm + 1);
2944 				if (callee < 0) {
2945 					ret = -EFAULT;
2946 					goto out;
2947 				}
2948 				if (color[callee] == 2)
2949 					continue;
2950 				if (color[callee] == 1) {
2951 					if (bpf_pseudo_func(&insn[idx]))
2952 						continue;
2953 					verbose(env, "recursive call from %s() to %s()\n",
2954 						subprog_name(env, cur),
2955 						subprog_name(env, callee));
2956 					ret = -EINVAL;
2957 					goto out;
2958 				}
2959 				color[callee] = 1;
2960 				dfs_stack[top++] = callee;
2961 				pushed = true;
2962 				break;
2963 			}
2964 
2965 			if (!pushed) {
2966 				color[cur] = 2;
2967 				env->subprog_topo_order[order++] = cur;
2968 				top--;
2969 			}
2970 		}
2971 	}
2972 
2973 	if (env->log.level & BPF_LOG_LEVEL2)
2974 		for (i = 0; i < cnt; i++)
2975 			verbose(env, "topo_order[%d] = %s\n",
2976 				i, subprog_name(env, env->subprog_topo_order[i]));
2977 out:
2978 	kvfree(dfs_stack);
2979 	kvfree(color);
2980 	return ret;
2981 }
2982 
2983 static void mark_stack_slots_scratched(struct bpf_verifier_env *env,
2984 				       int spi, int nr_slots)
2985 {
2986 	int i;
2987 
2988 	for (i = 0; i < nr_slots; i++)
2989 		mark_stack_slot_scratched(env, spi - i);
2990 }
2991 
2992 /* This function is supposed to be used by the following 32-bit optimization
2993  * code only. It returns TRUE if the source or destination register operates
2994  * on 64-bit, otherwise return FALSE.
2995  */
2996 bool bpf_is_reg64(struct bpf_insn *insn,
2997 	      u32 regno, struct bpf_reg_state *reg, enum bpf_reg_arg_type t)
2998 {
2999 	u8 code, class, op;
3000 
3001 	code = insn->code;
3002 	class = BPF_CLASS(code);
3003 	op = BPF_OP(code);
3004 	if (class == BPF_JMP) {
3005 		/* BPF_EXIT for "main" will reach here. Return TRUE
3006 		 * conservatively.
3007 		 */
3008 		if (op == BPF_EXIT)
3009 			return true;
3010 		if (op == BPF_CALL) {
3011 			/* BPF to BPF call will reach here because of marking
3012 			 * caller saved clobber with DST_OP_NO_MARK for which we
3013 			 * don't care the register def because they are anyway
3014 			 * marked as NOT_INIT already.
3015 			 */
3016 			if (insn->src_reg == BPF_PSEUDO_CALL)
3017 				return false;
3018 			/* Helper call will reach here because of arg type
3019 			 * check, conservatively return TRUE.
3020 			 */
3021 			if (t == SRC_OP)
3022 				return true;
3023 
3024 			return false;
3025 		}
3026 	}
3027 
3028 	if (class == BPF_ALU64 && op == BPF_END && (insn->imm == 16 || insn->imm == 32))
3029 		return false;
3030 
3031 	if (class == BPF_ALU64 || class == BPF_JMP ||
3032 	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
3033 		return true;
3034 
3035 	if (class == BPF_ALU || class == BPF_JMP32)
3036 		return false;
3037 
3038 	if (class == BPF_LDX) {
3039 		if (t != SRC_OP)
3040 			return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX;
3041 		/* LDX source must be ptr. */
3042 		return true;
3043 	}
3044 
3045 	if (class == BPF_STX) {
3046 		/* BPF_STX (including atomic variants) has one or more source
3047 		 * operands, one of which is a ptr. Check whether the caller is
3048 		 * asking about it.
3049 		 */
3050 		if (t == SRC_OP && reg->type != SCALAR_VALUE)
3051 			return true;
3052 		return BPF_SIZE(code) == BPF_DW;
3053 	}
3054 
3055 	if (class == BPF_LD) {
3056 		u8 mode = BPF_MODE(code);
3057 
3058 		/* LD_IMM64 */
3059 		if (mode == BPF_IMM)
3060 			return true;
3061 
3062 		/* Both LD_IND and LD_ABS return 32-bit data. */
3063 		if (t != SRC_OP)
3064 			return  false;
3065 
3066 		/* Implicit ctx ptr. */
3067 		if (regno == BPF_REG_6)
3068 			return true;
3069 
3070 		/* Explicit source could be any width. */
3071 		return true;
3072 	}
3073 
3074 	if (class == BPF_ST)
3075 		/* The only source register for BPF_ST is a ptr. */
3076 		return true;
3077 
3078 	/* Conservatively return true at default. */
3079 	return true;
3080 }
3081 
3082 static void mark_insn_zext(struct bpf_verifier_env *env,
3083 			   struct bpf_reg_state *reg)
3084 {
3085 	s32 def_idx = reg->subreg_def;
3086 
3087 	if (def_idx == DEF_NOT_SUBREG)
3088 		return;
3089 
3090 	env->insn_aux_data[def_idx - 1].zext_dst = true;
3091 	/* The dst will be zero extended, so won't be sub-register anymore. */
3092 	reg->subreg_def = DEF_NOT_SUBREG;
3093 }
3094 
3095 static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
3096 			   enum bpf_reg_arg_type t)
3097 {
3098 	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3099 	struct bpf_reg_state *reg;
3100 	bool rw64;
3101 
3102 	mark_reg_scratched(env, regno);
3103 
3104 	reg = &regs[regno];
3105 	rw64 = bpf_is_reg64(insn, regno, reg, t);
3106 	if (t == SRC_OP) {
3107 		/* check whether register used as source operand can be read */
3108 		if (reg->type == NOT_INIT) {
3109 			verbose(env, "R%d !read_ok\n", regno);
3110 			return -EACCES;
3111 		}
3112 		/* We don't need to worry about FP liveness because it's read-only */
3113 		if (regno == BPF_REG_FP)
3114 			return 0;
3115 
3116 		if (rw64)
3117 			mark_insn_zext(env, reg);
3118 
3119 		return 0;
3120 	} else {
3121 		/* check whether register used as dest operand can be written to */
3122 		if (regno == BPF_REG_FP) {
3123 			verbose(env, "frame pointer is read only\n");
3124 			return -EACCES;
3125 		}
3126 		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
3127 		if (t == DST_OP)
3128 			mark_reg_unknown(env, regs, regno);
3129 	}
3130 	return 0;
3131 }
3132 
3133 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3134 			 enum bpf_reg_arg_type t)
3135 {
3136 	struct bpf_verifier_state *vstate = env->cur_state;
3137 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3138 
3139 	return __check_reg_arg(env, state->regs, regno, t);
3140 }
3141 
3142 static void mark_indirect_target(struct bpf_verifier_env *env, int idx)
3143 {
3144 	env->insn_aux_data[idx].indirect_target = true;
3145 }
3146 
3147 #define LR_FRAMENO_BITS	4
3148 #define LR_SPI_BITS	6
3149 #define LR_ENTRY_BITS	(LR_SPI_BITS + LR_FRAMENO_BITS + 1)
3150 #define LR_SIZE_BITS	4
3151 #define LR_FRAMENO_MASK	((1ull << LR_FRAMENO_BITS) - 1)
3152 #define LR_SPI_MASK	((1ull << LR_SPI_BITS)     - 1)
3153 #define LR_SIZE_MASK	((1ull << LR_SIZE_BITS)    - 1)
3154 #define LR_SPI_OFF	LR_FRAMENO_BITS
3155 #define LR_IS_REG_OFF	(LR_SPI_BITS + LR_FRAMENO_BITS)
3156 #define LINKED_REGS_MAX	5
3157 
3158 static_assert(MAX_CALL_FRAMES <= (1 << LR_FRAMENO_BITS));
3159 static_assert(LINKED_REGS_MAX < (1 << LR_SIZE_BITS));
3160 static_assert(LINKED_REGS_MAX * LR_ENTRY_BITS + LR_SIZE_BITS <= 64);
3161 
3162 struct linked_reg {
3163 	u8 frameno;
3164 	union {
3165 		u8 spi;
3166 		u8 regno;
3167 	};
3168 	bool is_reg;
3169 };
3170 
3171 struct linked_regs {
3172 	int cnt;
3173 	struct linked_reg entries[LINKED_REGS_MAX];
3174 };
3175 
3176 static struct linked_reg *linked_regs_push(struct linked_regs *s)
3177 {
3178 	if (s->cnt < LINKED_REGS_MAX)
3179 		return &s->entries[s->cnt++];
3180 
3181 	return NULL;
3182 }
3183 
3184 /*
3185  * Use u64 as a vector of 5 11-bit values, use first 4-bits to track
3186  * number of elements currently in stack.
3187  * Pack one history entry for linked registers as 11 bits in the following format:
3188  * - 4-bits frameno
3189  * - 6-bits spi_or_reg
3190  * - 1-bit  is_reg
3191  */
3192 static u64 linked_regs_pack(struct linked_regs *s)
3193 {
3194 	u64 val = 0;
3195 	int i;
3196 
3197 	for (i = 0; i < s->cnt; ++i) {
3198 		struct linked_reg *e = &s->entries[i];
3199 		u64 tmp = 0;
3200 
3201 		tmp |= e->frameno;
3202 		tmp |= e->spi << LR_SPI_OFF;
3203 		tmp |= (e->is_reg ? 1 : 0) << LR_IS_REG_OFF;
3204 
3205 		val <<= LR_ENTRY_BITS;
3206 		val |= tmp;
3207 	}
3208 	val <<= LR_SIZE_BITS;
3209 	val |= s->cnt;
3210 	return val;
3211 }
3212 
3213 static void linked_regs_unpack(u64 val, struct linked_regs *s)
3214 {
3215 	int i;
3216 
3217 	s->cnt = val & LR_SIZE_MASK;
3218 	val >>= LR_SIZE_BITS;
3219 
3220 	for (i = 0; i < s->cnt; ++i) {
3221 		struct linked_reg *e = &s->entries[i];
3222 
3223 		e->frameno =  val & LR_FRAMENO_MASK;
3224 		e->spi     = (val >> LR_SPI_OFF) & LR_SPI_MASK;
3225 		e->is_reg  = (val >> LR_IS_REG_OFF) & 0x1;
3226 		val >>= LR_ENTRY_BITS;
3227 	}
3228 }
3229 
3230 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
3231 {
3232 	const struct btf_type *func;
3233 	struct btf *desc_btf;
3234 
3235 	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
3236 		return NULL;
3237 
3238 	desc_btf = find_kfunc_desc_btf(data, insn->off);
3239 	if (IS_ERR(desc_btf))
3240 		return "<error>";
3241 
3242 	func = btf_type_by_id(desc_btf, insn->imm);
3243 	return btf_name_by_offset(desc_btf, func->name_off);
3244 }
3245 
3246 void bpf_verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
3247 {
3248 	const struct bpf_insn_cbs cbs = {
3249 		.cb_call	= disasm_kfunc_name,
3250 		.cb_print	= verbose,
3251 		.private_data	= env,
3252 	};
3253 
3254 	print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
3255 }
3256 
3257 /* If any register R in hist->linked_regs is marked as precise in bt,
3258  * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs.
3259  */
3260 void bpf_bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist)
3261 {
3262 	struct linked_regs linked_regs;
3263 	bool some_precise = false;
3264 	int i;
3265 
3266 	if (!hist || hist->linked_regs == 0)
3267 		return;
3268 
3269 	linked_regs_unpack(hist->linked_regs, &linked_regs);
3270 	for (i = 0; i < linked_regs.cnt; ++i) {
3271 		struct linked_reg *e = &linked_regs.entries[i];
3272 
3273 		if ((e->is_reg && bt_is_frame_reg_set(bt, e->frameno, e->regno)) ||
3274 		    (!e->is_reg && bt_is_frame_slot_set(bt, e->frameno, e->spi))) {
3275 			some_precise = true;
3276 			break;
3277 		}
3278 	}
3279 
3280 	if (!some_precise)
3281 		return;
3282 
3283 	for (i = 0; i < linked_regs.cnt; ++i) {
3284 		struct linked_reg *e = &linked_regs.entries[i];
3285 
3286 		if (e->is_reg)
3287 			bpf_bt_set_frame_reg(bt, e->frameno, e->regno);
3288 		else
3289 			bpf_bt_set_frame_slot(bt, e->frameno, e->spi);
3290 	}
3291 }
3292 
3293 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
3294 {
3295 	return bpf_mark_chain_precision(env, env->cur_state, regno, NULL);
3296 }
3297 
3298 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to
3299  * desired reg and stack masks across all relevant frames
3300  */
3301 static int mark_chain_precision_batch(struct bpf_verifier_env *env,
3302 				      struct bpf_verifier_state *starting_state)
3303 {
3304 	return bpf_mark_chain_precision(env, starting_state, -1, NULL);
3305 }
3306 
3307 static bool is_spillable_regtype(enum bpf_reg_type type)
3308 {
3309 	switch (base_type(type)) {
3310 	case PTR_TO_MAP_VALUE:
3311 	case PTR_TO_STACK:
3312 	case PTR_TO_CTX:
3313 	case PTR_TO_PACKET:
3314 	case PTR_TO_PACKET_META:
3315 	case PTR_TO_PACKET_END:
3316 	case PTR_TO_FLOW_KEYS:
3317 	case CONST_PTR_TO_MAP:
3318 	case PTR_TO_SOCKET:
3319 	case PTR_TO_SOCK_COMMON:
3320 	case PTR_TO_TCP_SOCK:
3321 	case PTR_TO_XDP_SOCK:
3322 	case PTR_TO_BTF_ID:
3323 	case PTR_TO_BUF:
3324 	case PTR_TO_MEM:
3325 	case PTR_TO_FUNC:
3326 	case PTR_TO_MAP_KEY:
3327 	case PTR_TO_ARENA:
3328 		return true;
3329 	default:
3330 		return false;
3331 	}
3332 }
3333 
3334 
3335 /* check if register is a constant scalar value */
3336 static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
3337 {
3338 	return reg->type == SCALAR_VALUE &&
3339 	       tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off);
3340 }
3341 
3342 /* assuming is_reg_const() is true, return constant value of a register */
3343 static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
3344 {
3345 	return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value;
3346 }
3347 
3348 static bool __is_pointer_value(bool allow_ptr_leaks,
3349 			       const struct bpf_reg_state *reg)
3350 {
3351 	if (allow_ptr_leaks)
3352 		return false;
3353 
3354 	return reg->type != SCALAR_VALUE;
3355 }
3356 
3357 static void clear_scalar_id(struct bpf_reg_state *reg)
3358 {
3359 	reg->id = 0;
3360 	reg->delta = 0;
3361 }
3362 
3363 static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
3364 					struct bpf_reg_state *src_reg)
3365 {
3366 	if (src_reg->type != SCALAR_VALUE)
3367 		return;
3368 	/*
3369 	 * The verifier is processing rX = rY insn and
3370 	 * rY->id has special linked register already.
3371 	 * Cleared it, since multiple rX += const are not supported.
3372 	 */
3373 	if (src_reg->id & BPF_ADD_CONST)
3374 		clear_scalar_id(src_reg);
3375 	/*
3376 	 * Ensure that src_reg has a valid ID that will be copied to
3377 	 * dst_reg and then will be used by sync_linked_regs() to
3378 	 * propagate min/max range.
3379 	 */
3380 	if (!src_reg->id && !tnum_is_const(src_reg->var_off))
3381 		src_reg->id = ++env->id_gen;
3382 }
3383 
3384 static void save_register_state(struct bpf_verifier_env *env,
3385 				struct bpf_func_state *state,
3386 				int spi, struct bpf_reg_state *reg,
3387 				int size)
3388 {
3389 	int i;
3390 
3391 	state->stack[spi].spilled_ptr = *reg;
3392 
3393 	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
3394 		state->stack[spi].slot_type[i - 1] = STACK_SPILL;
3395 
3396 	/* size < 8 bytes spill */
3397 	for (; i; i--)
3398 		mark_stack_slot_misc(env, &state->stack[spi].slot_type[i - 1]);
3399 }
3400 
3401 static bool is_bpf_st_mem(struct bpf_insn *insn)
3402 {
3403 	return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
3404 }
3405 
3406 static int get_reg_width(struct bpf_reg_state *reg)
3407 {
3408 	return fls64(reg_umax(reg));
3409 }
3410 
3411 /* See comment for mark_fastcall_pattern_for_call() */
3412 static void check_fastcall_stack_contract(struct bpf_verifier_env *env,
3413 					  struct bpf_func_state *state, int insn_idx, int off)
3414 {
3415 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
3416 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
3417 	int i;
3418 
3419 	if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern)
3420 		return;
3421 	/* access to the region [max_stack_depth .. fastcall_stack_off)
3422 	 * from something that is not a part of the fastcall pattern,
3423 	 * disable fastcall rewrites for current subprogram by setting
3424 	 * fastcall_stack_off to a value smaller than any possible offset.
3425 	 */
3426 	subprog->fastcall_stack_off = S16_MIN;
3427 	/* reset fastcall aux flags within subprogram,
3428 	 * happens at most once per subprogram
3429 	 */
3430 	for (i = subprog->start; i < (subprog + 1)->start; ++i) {
3431 		aux[i].fastcall_spills_num = 0;
3432 		aux[i].fastcall_pattern = 0;
3433 	}
3434 }
3435 
3436 static void scrub_special_slot(struct bpf_func_state *state, int spi)
3437 {
3438 	int i;
3439 
3440 	/* regular write of data into stack destroys any spilled ptr */
3441 	state->stack[spi].spilled_ptr.type = NOT_INIT;
3442 	/* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
3443 	if (is_stack_slot_special(&state->stack[spi]))
3444 		for (i = 0; i < BPF_REG_SIZE; i++)
3445 			scrub_spilled_slot(&state->stack[spi].slot_type[i]);
3446 }
3447 
3448 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
3449  * stack boundary and alignment are checked in check_mem_access()
3450  */
3451 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
3452 				       /* stack frame we're writing to */
3453 				       struct bpf_func_state *state,
3454 				       int off, int size, int value_regno,
3455 				       int insn_idx)
3456 {
3457 	struct bpf_func_state *cur; /* state of the current function */
3458 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
3459 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3460 	struct bpf_reg_state *reg = NULL;
3461 	int insn_flags = INSN_F_STACK_ACCESS;
3462 	int hist_spi = spi, hist_frame = state->frameno;
3463 
3464 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
3465 	 * so it's aligned access and [off, off + size) are within stack limits
3466 	 */
3467 	if (!env->allow_ptr_leaks &&
3468 	    bpf_is_spilled_reg(&state->stack[spi]) &&
3469 	    !bpf_is_spilled_scalar_reg(&state->stack[spi]) &&
3470 	    size != BPF_REG_SIZE) {
3471 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
3472 		return -EACCES;
3473 	}
3474 
3475 	cur = env->cur_state->frame[env->cur_state->curframe];
3476 	if (value_regno >= 0)
3477 		reg = &cur->regs[value_regno];
3478 	if (!env->bypass_spec_v4) {
3479 		bool sanitize = reg && is_spillable_regtype(reg->type);
3480 
3481 		for (i = 0; i < size; i++) {
3482 			u8 type = state->stack[spi].slot_type[(slot - i) %
3483 							      BPF_REG_SIZE];
3484 
3485 			if (type != STACK_MISC && type != STACK_ZERO) {
3486 				sanitize = true;
3487 				break;
3488 			}
3489 		}
3490 
3491 		if (sanitize)
3492 			env->insn_aux_data[insn_idx].nospec_result = true;
3493 	}
3494 
3495 	err = destroy_if_dynptr_stack_slot(env, state, spi);
3496 	if (err)
3497 		return err;
3498 
3499 	check_fastcall_stack_contract(env, state, insn_idx, off);
3500 	mark_stack_slot_scratched(env, spi);
3501 	if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
3502 		bool reg_value_fits;
3503 
3504 		reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size;
3505 		/* Make sure that reg had an ID to build a relation on spill. */
3506 		if (reg_value_fits)
3507 			assign_scalar_id_before_mov(env, reg);
3508 		save_register_state(env, state, spi, reg, size);
3509 		/* Break the relation on a narrowing spill. */
3510 		if (!reg_value_fits)
3511 			state->stack[spi].spilled_ptr.id = 0;
3512 	} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
3513 		   env->bpf_capable) {
3514 		struct bpf_reg_state *tmp_reg = &env->fake_reg[0];
3515 
3516 		memset(tmp_reg, 0, sizeof(*tmp_reg));
3517 		__mark_reg_known(tmp_reg, insn->imm);
3518 		tmp_reg->type = SCALAR_VALUE;
3519 		save_register_state(env, state, spi, tmp_reg, size);
3520 	} else if (reg && is_spillable_regtype(reg->type)) {
3521 		/* register containing pointer is being spilled into stack */
3522 		if (size != BPF_REG_SIZE) {
3523 			verbose_linfo(env, insn_idx, "; ");
3524 			verbose(env, "invalid size of register spill\n");
3525 			return -EACCES;
3526 		}
3527 		if (state != cur && reg->type == PTR_TO_STACK) {
3528 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
3529 			return -EINVAL;
3530 		}
3531 		save_register_state(env, state, spi, reg, size);
3532 	} else {
3533 		u8 type = STACK_MISC;
3534 
3535 		scrub_special_slot(state, spi);
3536 
3537 		/* when we zero initialize stack slots mark them as such */
3538 		if ((reg && bpf_register_is_null(reg)) ||
3539 		    (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
3540 			/* STACK_ZERO case happened because register spill
3541 			 * wasn't properly aligned at the stack slot boundary,
3542 			 * so it's not a register spill anymore; force
3543 			 * originating register to be precise to make
3544 			 * STACK_ZERO correct for subsequent states
3545 			 */
3546 			err = mark_chain_precision(env, value_regno);
3547 			if (err)
3548 				return err;
3549 			type = STACK_ZERO;
3550 		}
3551 
3552 		/* Mark slots affected by this stack write. */
3553 		for (i = 0; i < size; i++)
3554 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
3555 		insn_flags = 0; /* not a register spill */
3556 	}
3557 
3558 	if (insn_flags)
3559 		return bpf_push_jmp_history(env, env->cur_state, insn_flags,
3560 					    hist_spi, hist_frame, 0);
3561 	return 0;
3562 }
3563 
3564 /* Write the stack: 'stack[ptr_reg + off] = value_regno'. 'ptr_reg' is
3565  * known to contain a variable offset.
3566  * This function checks whether the write is permitted and conservatively
3567  * tracks the effects of the write, considering that each stack slot in the
3568  * dynamic range is potentially written to.
3569  *
3570  * 'value_regno' can be -1, meaning that an unknown value is being written to
3571  * the stack.
3572  *
3573  * Spilled pointers in range are not marked as written because we don't know
3574  * what's going to be actually written. This means that read propagation for
3575  * future reads cannot be terminated by this write.
3576  *
3577  * For privileged programs, uninitialized stack slots are considered
3578  * initialized by this write (even though we don't know exactly what offsets
3579  * are going to be written to). The idea is that we don't want the verifier to
3580  * reject future reads that access slots written to through variable offsets.
3581  */
3582 static int check_stack_write_var_off(struct bpf_verifier_env *env,
3583 				     /* func where register points to */
3584 				     struct bpf_func_state *state,
3585 				     struct bpf_reg_state *ptr_reg, int off, int size,
3586 				     int value_regno, int insn_idx)
3587 {
3588 	struct bpf_func_state *cur; /* state of the current function */
3589 	int min_off, max_off;
3590 	int i, err;
3591 	struct bpf_reg_state *value_reg = NULL;
3592 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3593 	bool writing_zero = false;
3594 	/* set if the fact that we're writing a zero is used to let any
3595 	 * stack slots remain STACK_ZERO
3596 	 */
3597 	bool zero_used = false;
3598 
3599 	cur = env->cur_state->frame[env->cur_state->curframe];
3600 	min_off = reg_smin(ptr_reg) + off;
3601 	max_off = reg_smax(ptr_reg) + off + size;
3602 	if (value_regno >= 0)
3603 		value_reg = &cur->regs[value_regno];
3604 	if ((value_reg && bpf_register_is_null(value_reg)) ||
3605 	    (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
3606 		writing_zero = true;
3607 
3608 	for (i = min_off; i < max_off; i++) {
3609 		int spi;
3610 
3611 		spi = bpf_get_spi(i);
3612 		err = destroy_if_dynptr_stack_slot(env, state, spi);
3613 		if (err)
3614 			return err;
3615 	}
3616 
3617 	check_fastcall_stack_contract(env, state, insn_idx, min_off);
3618 	/* Variable offset writes destroy any spilled pointers in range. */
3619 	for (i = min_off; i < max_off; i++) {
3620 		u8 new_type, *stype;
3621 		int slot, spi;
3622 
3623 		slot = -i - 1;
3624 		spi = slot / BPF_REG_SIZE;
3625 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3626 		mark_stack_slot_scratched(env, spi);
3627 
3628 		if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
3629 			/* Reject the write if range we may write to has not
3630 			 * been initialized beforehand. If we didn't reject
3631 			 * here, the ptr status would be erased below (even
3632 			 * though not all slots are actually overwritten),
3633 			 * possibly opening the door to leaks.
3634 			 *
3635 			 * We do however catch STACK_INVALID case below, and
3636 			 * only allow reading possibly uninitialized memory
3637 			 * later for CAP_PERFMON, as the write may not happen to
3638 			 * that slot.
3639 			 */
3640 			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3641 				insn_idx, i);
3642 			return -EINVAL;
3643 		}
3644 
3645 		/* If writing_zero and the spi slot contains a spill of value 0,
3646 		 * maintain the spill type.
3647 		 */
3648 		if (writing_zero && *stype == STACK_SPILL &&
3649 		    bpf_is_spilled_scalar_reg(&state->stack[spi])) {
3650 			struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
3651 
3652 			if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) {
3653 				zero_used = true;
3654 				continue;
3655 			}
3656 		}
3657 
3658 		/*
3659 		 * Scrub slots if variable-offset stack write goes over spilled pointers.
3660 		 * Otherwise bpf_is_spilled_reg() may == true && spilled_ptr.type == NOT_INIT
3661 		 * and valid program is rejected by check_stack_read_fixed_off()
3662 		 * with obscure "invalid size of register fill" message.
3663 		 */
3664 		scrub_special_slot(state, spi);
3665 
3666 		/* Update the slot type. */
3667 		new_type = STACK_MISC;
3668 		if (writing_zero && *stype == STACK_ZERO) {
3669 			new_type = STACK_ZERO;
3670 			zero_used = true;
3671 		}
3672 		/* If the slot is STACK_INVALID, we check whether it's OK to
3673 		 * pretend that it will be initialized by this write. The slot
3674 		 * might not actually be written to, and so if we mark it as
3675 		 * initialized future reads might leak uninitialized memory.
3676 		 * For privileged programs, we will accept such reads to slots
3677 		 * that may or may not be written because, if we're reject
3678 		 * them, the error would be too confusing.
3679 		 * Conservatively, treat STACK_POISON in a similar way.
3680 		 */
3681 		if ((*stype == STACK_INVALID || *stype == STACK_POISON) &&
3682 		    !env->allow_uninit_stack) {
3683 			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
3684 					insn_idx, i);
3685 			return -EINVAL;
3686 		}
3687 		*stype = new_type;
3688 	}
3689 	if (zero_used) {
3690 		/* backtracking doesn't work for STACK_ZERO yet. */
3691 		err = mark_chain_precision(env, value_regno);
3692 		if (err)
3693 			return err;
3694 	}
3695 	return 0;
3696 }
3697 
3698 /* When register 'dst_regno' is assigned some values from stack[min_off,
3699  * max_off), we set the register's type according to the types of the
3700  * respective stack slots. If all the stack values are known to be zeros, then
3701  * so is the destination reg. Otherwise, the register is considered to be
3702  * SCALAR. This function does not deal with register filling; the caller must
3703  * ensure that all spilled registers in the stack range have been marked as
3704  * read.
3705  */
3706 static void mark_reg_stack_read(struct bpf_verifier_env *env,
3707 				/* func where src register points to */
3708 				struct bpf_func_state *ptr_state,
3709 				int min_off, int max_off, int dst_regno)
3710 {
3711 	struct bpf_verifier_state *vstate = env->cur_state;
3712 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3713 	int i, slot, spi;
3714 	u8 *stype;
3715 	int zeros = 0;
3716 
3717 	for (i = min_off; i < max_off; i++) {
3718 		slot = -i - 1;
3719 		spi = slot / BPF_REG_SIZE;
3720 		mark_stack_slot_scratched(env, spi);
3721 		stype = ptr_state->stack[spi].slot_type;
3722 		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3723 			break;
3724 		zeros++;
3725 	}
3726 	if (zeros == max_off - min_off) {
3727 		/* Any access_size read into register is zero extended,
3728 		 * so the whole register == const_zero.
3729 		 */
3730 		__mark_reg_const_zero(env, &state->regs[dst_regno]);
3731 	} else {
3732 		/* have read misc data from the stack */
3733 		mark_reg_unknown(env, state->regs, dst_regno);
3734 	}
3735 }
3736 
3737 /* Read the stack at 'off' and put the results into the register indicated by
3738  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3739  * spilled reg.
3740  *
3741  * 'dst_regno' can be -1, meaning that the read value is not going to a
3742  * register.
3743  *
3744  * The access is assumed to be within the current stack bounds.
3745  */
3746 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3747 				      /* func where src register points to */
3748 				      struct bpf_func_state *reg_state,
3749 				      int off, int size, int dst_regno)
3750 {
3751 	struct bpf_verifier_state *vstate = env->cur_state;
3752 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3753 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
3754 	struct bpf_reg_state *reg;
3755 	u8 *stype, type;
3756 	int insn_flags = INSN_F_STACK_ACCESS;
3757 	int hist_spi = spi, hist_frame = reg_state->frameno;
3758 
3759 	stype = reg_state->stack[spi].slot_type;
3760 	reg = &reg_state->stack[spi].spilled_ptr;
3761 
3762 	mark_stack_slot_scratched(env, spi);
3763 	check_fastcall_stack_contract(env, state, env->insn_idx, off);
3764 
3765 	if (bpf_is_spilled_reg(&reg_state->stack[spi])) {
3766 		u8 spill_size = 1;
3767 
3768 		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3769 			spill_size++;
3770 
3771 		if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
3772 			if (reg->type != SCALAR_VALUE) {
3773 				verbose_linfo(env, env->insn_idx, "; ");
3774 				verbose(env, "invalid size of register fill\n");
3775 				return -EACCES;
3776 			}
3777 
3778 			if (dst_regno < 0)
3779 				return 0;
3780 
3781 			if (size <= spill_size &&
3782 			    bpf_stack_narrow_access_ok(off, size, spill_size)) {
3783 				/* The earlier check_reg_arg() has decided the
3784 				 * subreg_def for this insn.  Save it first.
3785 				 */
3786 				s32 subreg_def = state->regs[dst_regno].subreg_def;
3787 
3788 				if (env->bpf_capable && size == 4 && spill_size == 4 &&
3789 				    get_reg_width(reg) <= 32)
3790 					/* Ensure stack slot has an ID to build a relation
3791 					 * with the destination register on fill.
3792 					 */
3793 					assign_scalar_id_before_mov(env, reg);
3794 				state->regs[dst_regno] = *reg;
3795 				state->regs[dst_regno].subreg_def = subreg_def;
3796 
3797 				/* Break the relation on a narrowing fill.
3798 				 * coerce_reg_to_size will adjust the boundaries.
3799 				 */
3800 				if (get_reg_width(reg) > size * BITS_PER_BYTE)
3801 					clear_scalar_id(&state->regs[dst_regno]);
3802 			} else {
3803 				int spill_cnt = 0, zero_cnt = 0;
3804 
3805 				for (i = 0; i < size; i++) {
3806 					type = stype[(slot - i) % BPF_REG_SIZE];
3807 					if (type == STACK_SPILL) {
3808 						spill_cnt++;
3809 						continue;
3810 					}
3811 					if (type == STACK_MISC)
3812 						continue;
3813 					if (type == STACK_ZERO) {
3814 						zero_cnt++;
3815 						continue;
3816 					}
3817 					if (type == STACK_INVALID && env->allow_uninit_stack)
3818 						continue;
3819 					if (type == STACK_POISON) {
3820 						verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
3821 							off, i, size);
3822 					} else {
3823 						verbose(env, "invalid read from stack off %d+%d size %d\n",
3824 							off, i, size);
3825 					}
3826 					return -EACCES;
3827 				}
3828 
3829 				if (spill_cnt == size &&
3830 				    tnum_is_const(reg->var_off) && reg->var_off.value == 0) {
3831 					__mark_reg_const_zero(env, &state->regs[dst_regno]);
3832 					/* this IS register fill, so keep insn_flags */
3833 				} else if (zero_cnt == size) {
3834 					/* similarly to mark_reg_stack_read(), preserve zeroes */
3835 					__mark_reg_const_zero(env, &state->regs[dst_regno]);
3836 					insn_flags = 0; /* not restoring original register state */
3837 				} else {
3838 					mark_reg_unknown(env, state->regs, dst_regno);
3839 					insn_flags = 0; /* not restoring original register state */
3840 				}
3841 			}
3842 		} else if (dst_regno >= 0) {
3843 			/* restore register state from stack */
3844 			if (env->bpf_capable)
3845 				/* Ensure stack slot has an ID to build a relation
3846 				 * with the destination register on fill.
3847 				 */
3848 				assign_scalar_id_before_mov(env, reg);
3849 			state->regs[dst_regno] = *reg;
3850 			/* mark reg as written since spilled pointer state likely
3851 			 * has its liveness marks cleared by is_state_visited()
3852 			 * which resets stack/reg liveness for state transitions
3853 			 */
3854 		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
3855 			/* If dst_regno==-1, the caller is asking us whether
3856 			 * it is acceptable to use this value as a SCALAR_VALUE
3857 			 * (e.g. for XADD).
3858 			 * We must not allow unprivileged callers to do that
3859 			 * with spilled pointers.
3860 			 */
3861 			verbose(env, "leaking pointer from stack off %d\n",
3862 				off);
3863 			return -EACCES;
3864 		}
3865 	} else {
3866 		for (i = 0; i < size; i++) {
3867 			type = stype[(slot - i) % BPF_REG_SIZE];
3868 			if (type == STACK_MISC)
3869 				continue;
3870 			if (type == STACK_ZERO)
3871 				continue;
3872 			if (type == STACK_INVALID && env->allow_uninit_stack)
3873 				continue;
3874 			if (type == STACK_POISON) {
3875 				verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
3876 					off, i, size);
3877 			} else {
3878 				verbose(env, "invalid read from stack off %d+%d size %d\n",
3879 					off, i, size);
3880 			}
3881 			return -EACCES;
3882 		}
3883 		if (dst_regno >= 0)
3884 			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
3885 		insn_flags = 0; /* we are not restoring spilled register */
3886 	}
3887 	if (insn_flags)
3888 		return bpf_push_jmp_history(env, env->cur_state, insn_flags,
3889 					    hist_spi, hist_frame, 0);
3890 	return 0;
3891 }
3892 
3893 enum bpf_access_src {
3894 	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
3895 	ACCESS_HELPER = 2,  /* the access is performed by a helper */
3896 };
3897 
3898 static int check_stack_range_initialized(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3899 					 argno_t argno, int off, int access_size,
3900 					 bool zero_size_allowed,
3901 					 enum bpf_access_type type,
3902 					 struct bpf_call_arg_meta *meta);
3903 
3904 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3905 {
3906 	return cur_regs(env) + regno;
3907 }
3908 
3909 /* Read the stack at 'reg + off' and put the result into the register
3910  * 'dst_regno'.
3911  * 'off' includes the pointer register's fixed offset(i.e. 'reg->off'),
3912  * but not its variable offset.
3913  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3914  *
3915  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3916  * filling registers (i.e. reads of spilled register cannot be detected when
3917  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3918  * SCALAR_VALUE. That's why we assert that the 'reg' has a variable
3919  * offset; for a fixed offset check_stack_read_fixed_off should be used
3920  * instead.
3921  */
3922 static int check_stack_read_var_off(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3923 				    argno_t ptr_argno, int off, int size, int dst_regno)
3924 {
3925 	struct bpf_func_state *ptr_state = bpf_func(env, reg);
3926 	int err;
3927 	int min_off, max_off;
3928 
3929 	/* Note that we pass a NULL meta, so raw access will not be permitted.
3930 	 */
3931 	err = check_stack_range_initialized(env, reg, ptr_argno, off, size,
3932 					    false, BPF_READ, NULL);
3933 	if (err)
3934 		return err;
3935 
3936 	min_off = reg_smin(reg) + off;
3937 	max_off = reg_smax(reg) + off;
3938 	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
3939 	check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off);
3940 	return 0;
3941 }
3942 
3943 /* check_stack_read dispatches to check_stack_read_fixed_off or
3944  * check_stack_read_var_off.
3945  *
3946  * The caller must ensure that the offset falls within the allocated stack
3947  * bounds.
3948  *
3949  * 'dst_regno' is a register which will receive the value from the stack. It
3950  * can be -1, meaning that the read value is not going to a register.
3951  */
3952 static int check_stack_read(struct bpf_verifier_env *env,
3953 			    struct bpf_reg_state *reg, argno_t ptr_argno, int off, int size,
3954 			    int dst_regno)
3955 {
3956 	struct bpf_func_state *state = bpf_func(env, reg);
3957 	int err;
3958 	/* Some accesses are only permitted with a static offset. */
3959 	bool var_off = !tnum_is_const(reg->var_off);
3960 
3961 	/* The offset is required to be static when reads don't go to a
3962 	 * register, in order to not leak pointers (see
3963 	 * check_stack_read_fixed_off).
3964 	 */
3965 	if (dst_regno < 0 && var_off) {
3966 		char tn_buf[48];
3967 
3968 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3969 		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
3970 			tn_buf, off, size);
3971 		return -EACCES;
3972 	}
3973 	/* Variable offset is prohibited for unprivileged mode for simplicity
3974 	 * since it requires corresponding support in Spectre masking for stack
3975 	 * ALU. See also retrieve_ptr_limit(). The check in
3976 	 * check_stack_access_for_ptr_arithmetic() called by
3977 	 * adjust_ptr_min_max_vals() prevents users from creating stack pointers
3978 	 * with variable offsets, therefore no check is required here. Further,
3979 	 * just checking it here would be insufficient as speculative stack
3980 	 * writes could still lead to unsafe speculative behaviour.
3981 	 */
3982 	if (!var_off) {
3983 		off += reg->var_off.value;
3984 		err = check_stack_read_fixed_off(env, state, off, size,
3985 						 dst_regno);
3986 	} else {
3987 		/* Variable offset stack reads need more conservative handling
3988 		 * than fixed offset ones. Note that dst_regno >= 0 on this
3989 		 * branch.
3990 		 */
3991 		err = check_stack_read_var_off(env, reg, ptr_argno, off, size,
3992 					       dst_regno);
3993 	}
3994 	return err;
3995 }
3996 
3997 
3998 /* check_stack_write dispatches to check_stack_write_fixed_off or
3999  * check_stack_write_var_off.
4000  *
4001  * 'reg' is the register used as a pointer into the stack.
4002  * 'value_regno' is the register whose value we're writing to the stack. It can
4003  * be -1, meaning that we're not writing from a register.
4004  *
4005  * The caller must ensure that the offset falls within the maximum stack size.
4006  */
4007 static int check_stack_write(struct bpf_verifier_env *env,
4008 			     struct bpf_reg_state *reg, int off, int size,
4009 			     int value_regno, int insn_idx)
4010 {
4011 	struct bpf_func_state *state = bpf_func(env, reg);
4012 	int err;
4013 
4014 	if (tnum_is_const(reg->var_off)) {
4015 		off += reg->var_off.value;
4016 		err = check_stack_write_fixed_off(env, state, off, size,
4017 						  value_regno, insn_idx);
4018 	} else {
4019 		/* Variable offset stack reads need more conservative handling
4020 		 * than fixed offset ones.
4021 		 */
4022 		err = check_stack_write_var_off(env, state,
4023 						reg, off, size,
4024 						value_regno, insn_idx);
4025 	}
4026 	return err;
4027 }
4028 
4029 /*
4030  * Write a value to the outgoing stack arg area.
4031  * off is a negative offset from r11 (e.g. -8 for arg6, -16 for arg7).
4032  */
4033 static int check_stack_arg_write(struct bpf_verifier_env *env, struct bpf_func_state *state,
4034 				 int off, struct bpf_reg_state *value_reg)
4035 {
4036 	int max_stack_arg_regs = MAX_BPF_FUNC_ARGS - MAX_BPF_FUNC_REG_ARGS;
4037 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
4038 	int spi = -off / BPF_REG_SIZE - 1;
4039 	struct bpf_reg_state *arg;
4040 	int err;
4041 
4042 	if (spi >= max_stack_arg_regs) {
4043 		verbose(env, "stack arg write offset %d exceeds max %d stack args\n",
4044 			off, max_stack_arg_regs);
4045 		return -EINVAL;
4046 	}
4047 
4048 	err = grow_stack_arg_slots(env, state, spi + 1);
4049 	if (err)
4050 		return err;
4051 
4052 	/* Track the max outgoing stack arg slot count. */
4053 	if (spi + 1 > subprog->max_out_stack_arg_cnt)
4054 		subprog->max_out_stack_arg_cnt = spi + 1;
4055 
4056 	if (value_reg) {
4057 		state->stack_arg_regs[spi] = *value_reg;
4058 	} else {
4059 		/* BPF_ST: store immediate, treat as scalar */
4060 		arg = &state->stack_arg_regs[spi];
4061 		arg->type = SCALAR_VALUE;
4062 		__mark_reg_known(arg, env->prog->insnsi[env->insn_idx].imm);
4063 	}
4064 	state->no_stack_arg_load = true;
4065 	return bpf_push_jmp_history(env, env->cur_state,
4066 				    INSN_F_STACK_ARG_ACCESS, spi, 0, 0);
4067 }
4068 
4069 /*
4070  * Read a value from the incoming stack arg area.
4071  * off is a positive offset from r11 (e.g. +8 for arg6, +16 for arg7).
4072  */
4073 static int check_stack_arg_read(struct bpf_verifier_env *env, struct bpf_func_state *state,
4074 				int off, int dst_regno)
4075 {
4076 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
4077 	struct bpf_verifier_state *vstate = env->cur_state;
4078 	int spi = off / BPF_REG_SIZE - 1;
4079 	struct bpf_func_state *caller, *cur;
4080 	struct bpf_reg_state *arg;
4081 
4082 	if (state->no_stack_arg_load) {
4083 		verbose(env, "r11 load must be before any r11 store or call insn\n");
4084 		return -EINVAL;
4085 	}
4086 
4087 	if (spi + 1 > bpf_in_stack_arg_cnt(subprog)) {
4088 		verbose(env, "invalid read from stack arg off %d depth %d\n",
4089 			off, bpf_in_stack_arg_cnt(subprog) * BPF_REG_SIZE);
4090 		return -EACCES;
4091 	}
4092 
4093 	caller = vstate->frame[vstate->curframe - 1];
4094 	arg = &caller->stack_arg_regs[spi];
4095 	cur = vstate->frame[vstate->curframe];
4096 	cur->regs[dst_regno] = *arg;
4097 	return bpf_push_jmp_history(env, env->cur_state,
4098 				    INSN_F_STACK_ARG_ACCESS, spi, 0, 0);
4099 }
4100 
4101 static int mark_stack_arg_precision(struct bpf_verifier_env *env, int arg_idx)
4102 {
4103 	struct bpf_func_state *caller = cur_func(env);
4104 	int spi = arg_idx - MAX_BPF_FUNC_REG_ARGS;
4105 
4106 	bt_set_frame_stack_arg_slot(&env->bt, caller->frameno, spi);
4107 	return mark_chain_precision_batch(env, env->cur_state);
4108 }
4109 
4110 static int check_outgoing_stack_args(struct bpf_verifier_env *env, struct bpf_func_state *caller,
4111 				     int nargs)
4112 {
4113 	int i, spi;
4114 
4115 	for (i = MAX_BPF_FUNC_REG_ARGS; i < nargs; i++) {
4116 		spi = i - MAX_BPF_FUNC_REG_ARGS;
4117 		if (spi >= caller->out_stack_arg_cnt ||
4118 		    caller->stack_arg_regs[spi].type == NOT_INIT) {
4119 			verbose(env, "callee expects %d args, stack arg%d is not initialized\n",
4120 				nargs, spi + 1);
4121 			return -EFAULT;
4122 		}
4123 	}
4124 
4125 	return 0;
4126 }
4127 
4128 static struct bpf_reg_state *get_func_arg_reg(struct bpf_func_state *caller,
4129 					      struct bpf_reg_state *regs, int arg)
4130 {
4131 	if (arg < MAX_BPF_FUNC_REG_ARGS)
4132 		return &regs[arg + 1];
4133 
4134 	return &caller->stack_arg_regs[arg - MAX_BPF_FUNC_REG_ARGS];
4135 }
4136 
4137 static int check_map_access_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
4138 				 int off, int size, enum bpf_access_type type)
4139 {
4140 	struct bpf_map *map = reg->map_ptr;
4141 	u32 cap = bpf_map_flags_to_cap(map);
4142 
4143 	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
4144 		verbose(env, "write into map forbidden, value_size=%d off=%lld size=%d\n",
4145 			map->value_size, reg_smin(reg) + off, size);
4146 		return -EACCES;
4147 	}
4148 
4149 	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
4150 		verbose(env, "read from map forbidden, value_size=%d off=%lld size=%d\n",
4151 			map->value_size, reg_smin(reg) + off, size);
4152 		return -EACCES;
4153 	}
4154 
4155 	return 0;
4156 }
4157 
4158 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
4159 static int __check_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
4160 			      int off, int size, u32 mem_size,
4161 			      bool zero_size_allowed)
4162 {
4163 	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
4164 
4165 	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
4166 		return 0;
4167 
4168 	switch (reg->type) {
4169 	case PTR_TO_MAP_KEY:
4170 		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
4171 			mem_size, off, size);
4172 		break;
4173 	case PTR_TO_MAP_VALUE:
4174 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
4175 			mem_size, off, size);
4176 		break;
4177 	case PTR_TO_PACKET:
4178 	case PTR_TO_PACKET_META:
4179 	case PTR_TO_PACKET_END:
4180 		verbose(env, "invalid access to packet, off=%d size=%d, %s(id=%d,off=%d,r=%d)\n",
4181 			off, size, reg_arg_name(env, argno), reg->id, off, mem_size);
4182 		break;
4183 	case PTR_TO_CTX:
4184 		verbose(env, "invalid access to context, ctx_size=%d off=%d size=%d\n",
4185 			mem_size, off, size);
4186 		break;
4187 	case PTR_TO_MEM:
4188 	default:
4189 		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
4190 			mem_size, off, size);
4191 	}
4192 
4193 	return -EACCES;
4194 }
4195 
4196 /* check read/write into a memory region with possible variable offset */
4197 static int check_mem_region_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
4198 				   int off, int size, u32 mem_size,
4199 				   bool zero_size_allowed)
4200 {
4201 	int err;
4202 
4203 	/* We may have adjusted the register pointing to memory region, so we
4204 	 * need to try adding each of min_value and max_value to off
4205 	 * to make sure our theoretical access will be safe.
4206 	 *
4207 	 * The minimum value is only important with signed
4208 	 * comparisons where we can't assume the floor of a
4209 	 * value is 0.  If we are using signed variables for our
4210 	 * index'es we need to make sure that whatever we use
4211 	 * will have a set floor within our range.
4212 	 */
4213 	if (reg_smin(reg) < 0 &&
4214 	    (reg_smin(reg) == S64_MIN ||
4215 	     (off + reg_smin(reg) != (s64)(s32)(off + reg_smin(reg))) ||
4216 	      reg_smin(reg) + off < 0)) {
4217 		verbose(env, "%s min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4218 			reg_arg_name(env, argno));
4219 		return -EACCES;
4220 	}
4221 	err = __check_mem_access(env, reg, argno, reg_smin(reg) + off, size,
4222 				 mem_size, zero_size_allowed);
4223 	if (err) {
4224 		verbose(env, "%s min value is outside of the allowed memory range\n",
4225 			reg_arg_name(env, argno));
4226 		return err;
4227 	}
4228 
4229 	/* If we haven't set a max value then we need to bail since we can't be
4230 	 * sure we won't do bad things.
4231 	 * If reg_umax(reg) + off could overflow, treat that as unbounded too.
4232 	 */
4233 	if (reg_umax(reg) >= BPF_MAX_VAR_OFF) {
4234 		verbose(env, "%s unbounded memory access, make sure to bounds check any such access\n",
4235 			reg_arg_name(env, argno));
4236 		return -EACCES;
4237 	}
4238 	err = __check_mem_access(env, reg, argno, reg_umax(reg) + off, size,
4239 				 mem_size, zero_size_allowed);
4240 	if (err) {
4241 		verbose(env, "%s max value is outside of the allowed memory range\n",
4242 			reg_arg_name(env, argno));
4243 		return err;
4244 	}
4245 
4246 	return 0;
4247 }
4248 
4249 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
4250 			       const struct bpf_reg_state *reg, argno_t argno,
4251 			       bool fixed_off_ok)
4252 {
4253 	/* Access to this pointer-typed register or passing it to a helper
4254 	 * is only allowed in its original, unmodified form.
4255 	 */
4256 
4257 	if (!tnum_is_const(reg->var_off)) {
4258 		char tn_buf[48];
4259 
4260 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4261 		verbose(env, "variable %s access var_off=%s disallowed\n",
4262 			reg_type_str(env, reg->type), tn_buf);
4263 		return -EACCES;
4264 	}
4265 
4266 	if (reg_smin(reg) < 0) {
4267 		verbose(env, "negative offset %s ptr %s off=%lld disallowed\n",
4268 			reg_type_str(env, reg->type), reg_arg_name(env, argno), reg->var_off.value);
4269 		return -EACCES;
4270 	}
4271 
4272 	if (!fixed_off_ok && reg->var_off.value != 0) {
4273 		verbose(env, "dereference of modified %s ptr %s off=%lld disallowed\n",
4274 			reg_type_str(env, reg->type), reg_arg_name(env, argno), reg->var_off.value);
4275 		return -EACCES;
4276 	}
4277 
4278 	return 0;
4279 }
4280 
4281 static int check_ptr_off_reg(struct bpf_verifier_env *env,
4282 		             const struct bpf_reg_state *reg, int regno)
4283 {
4284 	return __check_ptr_off_reg(env, reg, argno_from_reg(regno), false);
4285 }
4286 
4287 static int map_kptr_match_type(struct bpf_verifier_env *env,
4288 			       struct btf_field *kptr_field,
4289 			       struct bpf_reg_state *reg, u32 regno)
4290 {
4291 	const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
4292 	int perm_flags;
4293 	const char *reg_name = "";
4294 
4295 	if (base_type(reg->type) != PTR_TO_BTF_ID)
4296 		goto bad_type;
4297 
4298 	if (btf_is_kernel(reg->btf)) {
4299 		perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
4300 
4301 		/* Only unreferenced case accepts untrusted pointers */
4302 		if (kptr_field->type == BPF_KPTR_UNREF)
4303 			perm_flags |= PTR_UNTRUSTED;
4304 	} else {
4305 		perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
4306 		if (kptr_field->type == BPF_KPTR_PERCPU)
4307 			perm_flags |= MEM_PERCPU;
4308 	}
4309 
4310 	if (type_flag(reg->type) & ~perm_flags)
4311 		goto bad_type;
4312 
4313 	/* We need to verify reg->type and reg->btf, before accessing reg->btf */
4314 	reg_name = btf_type_name(reg->btf, reg->btf_id);
4315 
4316 	/* For ref_ptr case, release function check should ensure we get one
4317 	 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
4318 	 * normal store of unreferenced kptr, we must ensure var_off is zero.
4319 	 * Since ref_ptr cannot be accessed directly by BPF insns, check for
4320 	 * reg->id is not needed here.
4321 	 */
4322 	if (__check_ptr_off_reg(env, reg, argno_from_reg(regno), true))
4323 		return -EACCES;
4324 
4325 	/* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
4326 	 * we also need to take into account the reg->var_off.
4327 	 *
4328 	 * We want to support cases like:
4329 	 *
4330 	 * struct foo {
4331 	 *         struct bar br;
4332 	 *         struct baz bz;
4333 	 * };
4334 	 *
4335 	 * struct foo *v;
4336 	 * v = func();	      // PTR_TO_BTF_ID
4337 	 * val->foo = v;      // reg->var_off is zero, btf and btf_id match type
4338 	 * val->bar = &v->br; // reg->var_off is still zero, but we need to retry with
4339 	 *                    // first member type of struct after comparison fails
4340 	 * val->baz = &v->bz; // reg->var_off is non-zero, so struct needs to be walked
4341 	 *                    // to match type
4342 	 *
4343 	 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->var_off
4344 	 * is zero. We must also ensure that btf_struct_ids_match does not walk
4345 	 * the struct to match type against first member of struct, i.e. reject
4346 	 * second case from above. Hence, when type is BPF_KPTR_REF, we set
4347 	 * strict mode to true for type match.
4348 	 */
4349 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->var_off.value,
4350 				  kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4351 				  kptr_field->type != BPF_KPTR_UNREF))
4352 		goto bad_type;
4353 	return 0;
4354 bad_type:
4355 	verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
4356 		reg_type_str(env, reg->type), reg_name);
4357 	verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
4358 	if (kptr_field->type == BPF_KPTR_UNREF)
4359 		verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
4360 			targ_name);
4361 	else
4362 		verbose(env, "\n");
4363 	return -EINVAL;
4364 }
4365 
4366 static bool in_sleepable(struct bpf_verifier_env *env)
4367 {
4368 	return env->cur_state->in_sleepable;
4369 }
4370 
4371 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
4372  * can dereference RCU protected pointers and result is PTR_TRUSTED.
4373  */
4374 static bool in_rcu_cs(struct bpf_verifier_env *env)
4375 {
4376 	return env->cur_state->active_rcu_locks ||
4377 	       env->cur_state->active_locks ||
4378 	       !in_sleepable(env);
4379 }
4380 
4381 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
4382 BTF_SET_START(rcu_protected_types)
4383 #ifdef CONFIG_NET
4384 BTF_ID(struct, prog_test_ref_kfunc)
4385 #endif
4386 #ifdef CONFIG_CGROUPS
4387 BTF_ID(struct, cgroup)
4388 #endif
4389 #ifdef CONFIG_BPF_JIT
4390 BTF_ID(struct, bpf_cpumask)
4391 #endif
4392 BTF_ID(struct, task_struct)
4393 #ifdef CONFIG_CRYPTO
4394 BTF_ID(struct, bpf_crypto_ctx)
4395 #endif
4396 BTF_SET_END(rcu_protected_types)
4397 
4398 static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
4399 {
4400 	if (!btf_is_kernel(btf))
4401 		return true;
4402 	return btf_id_set_contains(&rcu_protected_types, btf_id);
4403 }
4404 
4405 static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
4406 {
4407 	struct btf_struct_meta *meta;
4408 
4409 	if (btf_is_kernel(kptr_field->kptr.btf))
4410 		return NULL;
4411 
4412 	meta = btf_find_struct_meta(kptr_field->kptr.btf,
4413 				    kptr_field->kptr.btf_id);
4414 
4415 	return meta ? meta->record : NULL;
4416 }
4417 
4418 static bool rcu_safe_kptr(const struct btf_field *field)
4419 {
4420 	const struct btf_field_kptr *kptr = &field->kptr;
4421 
4422 	return field->type == BPF_KPTR_PERCPU ||
4423 	       (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
4424 }
4425 
4426 static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
4427 {
4428 	struct btf_record *rec;
4429 	u32 ret;
4430 
4431 	ret = PTR_MAYBE_NULL;
4432 	if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
4433 		ret |= MEM_RCU;
4434 		if (kptr_field->type == BPF_KPTR_PERCPU)
4435 			ret |= MEM_PERCPU;
4436 		else if (!btf_is_kernel(kptr_field->kptr.btf))
4437 			ret |= MEM_ALLOC;
4438 
4439 		rec = kptr_pointee_btf_record(kptr_field);
4440 		if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
4441 			ret |= NON_OWN_REF;
4442 	} else {
4443 		ret |= PTR_UNTRUSTED;
4444 	}
4445 
4446 	return ret;
4447 }
4448 
4449 static int mark_uptr_ld_reg(struct bpf_verifier_env *env, u32 regno,
4450 			    struct btf_field *field)
4451 {
4452 	struct bpf_reg_state *reg;
4453 	const struct btf_type *t;
4454 
4455 	t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id);
4456 	mark_reg_known_zero(env, cur_regs(env), regno);
4457 	reg = reg_state(env, regno);
4458 	reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
4459 	reg->mem_size = t->size;
4460 	reg->id = ++env->id_gen;
4461 
4462 	return 0;
4463 }
4464 
4465 static int check_map_kptr_access(struct bpf_verifier_env *env,
4466 				 int value_regno, int insn_idx,
4467 				 struct btf_field *kptr_field)
4468 {
4469 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4470 	int class = BPF_CLASS(insn->code);
4471 	struct bpf_reg_state *val_reg;
4472 	int ret;
4473 
4474 	/* Things we already checked for in check_map_access and caller:
4475 	 *  - Reject cases where variable offset may touch kptr
4476 	 *  - size of access (must be BPF_DW)
4477 	 *  - tnum_is_const(reg->var_off)
4478 	 *  - kptr_field->offset == off + reg->var_off.value
4479 	 */
4480 	/* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
4481 	if (BPF_MODE(insn->code) != BPF_MEM) {
4482 		verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
4483 		return -EACCES;
4484 	}
4485 
4486 	/* We only allow loading referenced kptr, since it will be marked as
4487 	 * untrusted, similar to unreferenced kptr.
4488 	 */
4489 	if (class != BPF_LDX &&
4490 	    (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
4491 		verbose(env, "store to referenced kptr disallowed\n");
4492 		return -EACCES;
4493 	}
4494 	if (class != BPF_LDX && kptr_field->type == BPF_UPTR) {
4495 		verbose(env, "store to uptr disallowed\n");
4496 		return -EACCES;
4497 	}
4498 
4499 	if (class == BPF_LDX) {
4500 		if (kptr_field->type == BPF_UPTR)
4501 			return mark_uptr_ld_reg(env, value_regno, kptr_field);
4502 
4503 		/* We can simply mark the value_regno receiving the pointer
4504 		 * value from map as PTR_TO_BTF_ID, with the correct type.
4505 		 */
4506 		ret = mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID,
4507 				      kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4508 				      btf_ld_kptr_type(env, kptr_field));
4509 		if (ret < 0)
4510 			return ret;
4511 	} else if (class == BPF_STX) {
4512 		val_reg = reg_state(env, value_regno);
4513 		if (!bpf_register_is_null(val_reg) &&
4514 		    map_kptr_match_type(env, kptr_field, val_reg, value_regno))
4515 			return -EACCES;
4516 	} else if (class == BPF_ST) {
4517 		if (insn->imm) {
4518 			verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
4519 				kptr_field->offset);
4520 			return -EACCES;
4521 		}
4522 	} else {
4523 		verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
4524 		return -EACCES;
4525 	}
4526 	return 0;
4527 }
4528 
4529 /*
4530  * Return the size of the memory region accessible from a pointer to map value.
4531  * For INSN_ARRAY maps whole bpf_insn_array->ips array is accessible.
4532  */
4533 static u32 map_mem_size(const struct bpf_map *map)
4534 {
4535 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY)
4536 		return map->max_entries * sizeof(long);
4537 
4538 	return map->value_size;
4539 }
4540 
4541 /* check read/write into a map element with possible variable offset */
4542 static int check_map_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
4543 			    int off, int size, bool zero_size_allowed,
4544 			    enum bpf_access_src src)
4545 {
4546 	struct bpf_map *map = reg->map_ptr;
4547 	u32 mem_size = map_mem_size(map);
4548 	struct btf_record *rec;
4549 	int err, i;
4550 
4551 	err = check_mem_region_access(env, reg, argno, off, size, mem_size, zero_size_allowed);
4552 	if (err)
4553 		return err;
4554 
4555 	if (IS_ERR_OR_NULL(map->record))
4556 		return 0;
4557 	rec = map->record;
4558 	for (i = 0; i < rec->cnt; i++) {
4559 		struct btf_field *field = &rec->fields[i];
4560 		u32 p = field->offset;
4561 
4562 		/* If any part of a field  can be touched by load/store, reject
4563 		 * this program. To check that [x1, x2) overlaps with [y1, y2),
4564 		 * it is sufficient to check x1 < y2 && y1 < x2.
4565 		 */
4566 		if (reg_smin(reg) + off < p + field->size &&
4567 		    p < reg_umax(reg) + off + size) {
4568 			switch (field->type) {
4569 			case BPF_KPTR_UNREF:
4570 			case BPF_KPTR_REF:
4571 			case BPF_KPTR_PERCPU:
4572 			case BPF_UPTR:
4573 				if (src != ACCESS_DIRECT) {
4574 					verbose(env, "%s cannot be accessed indirectly by helper\n",
4575 						btf_field_type_name(field->type));
4576 					return -EACCES;
4577 				}
4578 				if (!tnum_is_const(reg->var_off)) {
4579 					verbose(env, "%s access cannot have variable offset\n",
4580 						btf_field_type_name(field->type));
4581 					return -EACCES;
4582 				}
4583 				if (p != off + reg->var_off.value) {
4584 					verbose(env, "%s access misaligned expected=%u off=%llu\n",
4585 						btf_field_type_name(field->type),
4586 						p, off + reg->var_off.value);
4587 					return -EACCES;
4588 				}
4589 				if (size != bpf_size_to_bytes(BPF_DW)) {
4590 					verbose(env, "%s access size must be BPF_DW\n",
4591 						btf_field_type_name(field->type));
4592 					return -EACCES;
4593 				}
4594 				break;
4595 			default:
4596 				verbose(env, "%s cannot be accessed directly by load/store\n",
4597 					btf_field_type_name(field->type));
4598 				return -EACCES;
4599 			}
4600 		}
4601 	}
4602 	return 0;
4603 }
4604 
4605 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
4606 			       const struct bpf_call_arg_meta *meta,
4607 			       enum bpf_access_type t)
4608 {
4609 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
4610 
4611 	switch (prog_type) {
4612 	/* Program types only with direct read access go here! */
4613 	case BPF_PROG_TYPE_LWT_IN:
4614 	case BPF_PROG_TYPE_LWT_OUT:
4615 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
4616 	case BPF_PROG_TYPE_SK_REUSEPORT:
4617 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
4618 	case BPF_PROG_TYPE_CGROUP_SKB:
4619 		if (t == BPF_WRITE)
4620 			return false;
4621 		fallthrough;
4622 
4623 	/* Program types with direct read + write access go here! */
4624 	case BPF_PROG_TYPE_SCHED_CLS:
4625 	case BPF_PROG_TYPE_SCHED_ACT:
4626 	case BPF_PROG_TYPE_XDP:
4627 	case BPF_PROG_TYPE_LWT_XMIT:
4628 	case BPF_PROG_TYPE_SK_SKB:
4629 	case BPF_PROG_TYPE_SK_MSG:
4630 		if (meta)
4631 			return meta->pkt_access;
4632 
4633 		env->seen_direct_write = true;
4634 		return true;
4635 
4636 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4637 		if (t == BPF_WRITE)
4638 			env->seen_direct_write = true;
4639 
4640 		return true;
4641 
4642 	default:
4643 		return false;
4644 	}
4645 }
4646 
4647 static int check_packet_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off,
4648 			       int size, bool zero_size_allowed)
4649 {
4650 	int err;
4651 
4652 	if (reg->range < 0) {
4653 		verbose(env, "%s offset is outside of the packet\n", reg_arg_name(env, argno));
4654 		return -EINVAL;
4655 	}
4656 
4657 	err = check_mem_region_access(env, reg, argno, off, size, reg->range, zero_size_allowed);
4658 	if (err)
4659 		return err;
4660 
4661 	/* __check_mem_access has made sure "off + size - 1" is within u16.
4662 	 * reg_umax(reg) can't be bigger than MAX_PACKET_OFF which is 0xffff,
4663 	 * otherwise find_good_pkt_pointers would have refused to set range info
4664 	 * that __check_mem_access would have rejected this pkt access.
4665 	 * Therefore, "off + reg_umax(reg) + size - 1" won't overflow u32.
4666 	 */
4667 	env->prog->aux->max_pkt_offset =
4668 		max_t(u32, env->prog->aux->max_pkt_offset,
4669 		      off + reg_umax(reg) + size - 1);
4670 
4671 	return 0;
4672 }
4673 
4674 static bool is_var_ctx_off_allowed(struct bpf_prog *prog)
4675 {
4676 	return resolve_prog_type(prog) == BPF_PROG_TYPE_SYSCALL;
4677 }
4678 
4679 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
4680 static int __check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
4681 			      enum bpf_access_type t, struct bpf_insn_access_aux *info)
4682 {
4683 	if (env->ops->is_valid_access &&
4684 	    env->ops->is_valid_access(off, size, t, env->prog, info)) {
4685 		/* A non zero info.ctx_field_size indicates that this field is a
4686 		 * candidate for later verifier transformation to load the whole
4687 		 * field and then apply a mask when accessed with a narrower
4688 		 * access than actual ctx access size. A zero info.ctx_field_size
4689 		 * will only allow for whole field access and rejects any other
4690 		 * type of narrower access.
4691 		 */
4692 		if (base_type(info->reg_type) == PTR_TO_BTF_ID) {
4693 			if (info->ref_id &&
4694 			    !find_reference_state(env->cur_state, info->ref_id)) {
4695 				verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n",
4696 					off);
4697 				return -EACCES;
4698 			}
4699 		} else {
4700 			env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size;
4701 		}
4702 		/* remember the offset of last byte accessed in ctx */
4703 		if (env->prog->aux->max_ctx_offset < off + size)
4704 			env->prog->aux->max_ctx_offset = off + size;
4705 		return 0;
4706 	}
4707 
4708 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
4709 	return -EACCES;
4710 }
4711 
4712 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *reg, argno_t argno,
4713 			    int off, int access_size, enum bpf_access_type t,
4714 			    struct bpf_insn_access_aux *info)
4715 {
4716 	/*
4717 	 * Program types that don't rewrite ctx accesses can safely
4718 	 * dereference ctx pointers with fixed offsets.
4719 	 */
4720 	bool var_off_ok = is_var_ctx_off_allowed(env->prog);
4721 	bool fixed_off_ok = !env->ops->convert_ctx_access;
4722 	int err;
4723 
4724 	if (var_off_ok)
4725 		err = check_mem_region_access(env, reg, argno, off, access_size, U16_MAX, false);
4726 	else
4727 		err = __check_ptr_off_reg(env, reg, argno, fixed_off_ok);
4728 	if (err)
4729 		return err;
4730 	off += reg_umax(reg);
4731 
4732 	err = __check_ctx_access(env, insn_idx, off, access_size, t, info);
4733 	if (err)
4734 		verbose_linfo(env, insn_idx, "; ");
4735 	return err;
4736 }
4737 
4738 static int check_flow_keys_access(struct bpf_verifier_env *env,
4739 				  struct bpf_reg_state *reg, argno_t argno,
4740 				  int off, int size)
4741 {
4742 	/* Only a constant offset is allowed here; fold it into off. */
4743 	if (!tnum_is_const(reg->var_off)) {
4744 		char tn_buf[48];
4745 
4746 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4747 		verbose(env, "%s invalid variable offset to flow keys: off=%d, var_off=%s\n",
4748 			reg_arg_name(env, argno), off, tn_buf);
4749 		return -EACCES;
4750 	}
4751 	off += reg->var_off.value;
4752 
4753 	if (size < 0 || off < 0 ||
4754 	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
4755 		verbose(env, "invalid access to flow keys off=%d size=%d\n",
4756 			off, size);
4757 		return -EACCES;
4758 	}
4759 	return 0;
4760 }
4761 
4762 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
4763 			     struct bpf_reg_state *reg, argno_t argno, int off, int size,
4764 			     enum bpf_access_type t)
4765 {
4766 	struct bpf_insn_access_aux info = {};
4767 	bool valid;
4768 
4769 	if (reg_smin(reg) < 0) {
4770 		verbose(env, "%s min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4771 			reg_arg_name(env, argno));
4772 		return -EACCES;
4773 	}
4774 
4775 	switch (reg->type) {
4776 	case PTR_TO_SOCK_COMMON:
4777 		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
4778 		break;
4779 	case PTR_TO_SOCKET:
4780 		valid = bpf_sock_is_valid_access(off, size, t, &info);
4781 		break;
4782 	case PTR_TO_TCP_SOCK:
4783 		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
4784 		break;
4785 	case PTR_TO_XDP_SOCK:
4786 		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
4787 		break;
4788 	default:
4789 		valid = false;
4790 	}
4791 
4792 
4793 	if (valid) {
4794 		env->insn_aux_data[insn_idx].ctx_field_size =
4795 			info.ctx_field_size;
4796 		return 0;
4797 	}
4798 
4799 	verbose(env, "%s invalid %s access off=%d size=%d\n",
4800 		reg_arg_name(env, argno), reg_type_str(env, reg->type), off, size);
4801 
4802 	return -EACCES;
4803 }
4804 
4805 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
4806 {
4807 	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4808 }
4809 
4810 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
4811 {
4812 	const struct bpf_reg_state *reg = reg_state(env, regno);
4813 
4814 	return reg->type == PTR_TO_CTX;
4815 }
4816 
4817 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
4818 {
4819 	const struct bpf_reg_state *reg = reg_state(env, regno);
4820 
4821 	return type_is_sk_pointer(reg->type);
4822 }
4823 
4824 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
4825 {
4826 	const struct bpf_reg_state *reg = reg_state(env, regno);
4827 
4828 	return type_is_pkt_pointer(reg->type);
4829 }
4830 
4831 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
4832 {
4833 	const struct bpf_reg_state *reg = reg_state(env, regno);
4834 
4835 	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
4836 	return reg->type == PTR_TO_FLOW_KEYS;
4837 }
4838 
4839 static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
4840 {
4841 	const struct bpf_reg_state *reg = reg_state(env, regno);
4842 
4843 	return reg->type == PTR_TO_ARENA;
4844 }
4845 
4846 /* Return false if @regno contains a pointer whose type isn't supported for
4847  * atomic instruction @insn.
4848  */
4849 static bool atomic_ptr_type_ok(struct bpf_verifier_env *env, int regno,
4850 			       struct bpf_insn *insn)
4851 {
4852 	if (is_ctx_reg(env, regno))
4853 		return false;
4854 	if (is_pkt_reg(env, regno))
4855 		return false;
4856 	if (is_flow_key_reg(env, regno))
4857 		return false;
4858 	if (is_sk_reg(env, regno))
4859 		return false;
4860 	if (is_arena_reg(env, regno))
4861 		return bpf_jit_supports_insn(insn, true);
4862 
4863 	return true;
4864 }
4865 
4866 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
4867 #ifdef CONFIG_NET
4868 	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
4869 	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
4870 	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
4871 #endif
4872 	[CONST_PTR_TO_MAP] = btf_bpf_map_id,
4873 };
4874 
4875 static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
4876 {
4877 	/* A referenced register is always trusted. */
4878 	if (reg_is_referenced(env, reg))
4879 		return true;
4880 
4881 	/* Types listed in the reg2btf_ids are always trusted */
4882 	if (reg2btf_ids[base_type(reg->type)] &&
4883 	    !bpf_type_has_unsafe_modifiers(reg->type))
4884 		return true;
4885 
4886 	/* If a register is not referenced, it is trusted if it has the
4887 	 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
4888 	 * other type modifiers may be safe, but we elect to take an opt-in
4889 	 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
4890 	 * not.
4891 	 *
4892 	 * Eventually, we should make PTR_TRUSTED the single source of truth
4893 	 * for whether a register is trusted.
4894 	 */
4895 	return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
4896 	       !bpf_type_has_unsafe_modifiers(reg->type);
4897 }
4898 
4899 static bool is_rcu_reg(const struct bpf_reg_state *reg)
4900 {
4901 	return reg->type & MEM_RCU;
4902 }
4903 
4904 static void clear_trusted_flags(enum bpf_type_flag *flag)
4905 {
4906 	*flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU);
4907 }
4908 
4909 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
4910 				   const struct bpf_reg_state *reg,
4911 				   int off, int size, bool strict)
4912 {
4913 	struct tnum reg_off;
4914 	int ip_align;
4915 
4916 	/* Byte size accesses are always allowed. */
4917 	if (!strict || size == 1)
4918 		return 0;
4919 
4920 	/* For platforms that do not have a Kconfig enabling
4921 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
4922 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
4923 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
4924 	 * to this code only in strict mode where we want to emulate
4925 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
4926 	 * unconditional IP align value of '2'.
4927 	 */
4928 	ip_align = 2;
4929 
4930 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + off));
4931 	if (!tnum_is_aligned(reg_off, size)) {
4932 		char tn_buf[48];
4933 
4934 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4935 		verbose(env,
4936 			"misaligned packet access off %d+%s+%d size %d\n",
4937 			ip_align, tn_buf, off, size);
4938 		return -EACCES;
4939 	}
4940 
4941 	return 0;
4942 }
4943 
4944 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
4945 				       const struct bpf_reg_state *reg,
4946 				       const char *pointer_desc,
4947 				       int off, int size, bool strict)
4948 {
4949 	struct tnum reg_off;
4950 
4951 	/* Byte size accesses are always allowed. */
4952 	if (!strict || size == 1)
4953 		return 0;
4954 
4955 	reg_off = tnum_add(reg->var_off, tnum_const(off));
4956 	if (!tnum_is_aligned(reg_off, size)) {
4957 		char tn_buf[48];
4958 
4959 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4960 		verbose(env, "misaligned %saccess off %s+%d size %d\n",
4961 			pointer_desc, tn_buf, off, size);
4962 		return -EACCES;
4963 	}
4964 
4965 	return 0;
4966 }
4967 
4968 static int check_ptr_alignment(struct bpf_verifier_env *env,
4969 			       const struct bpf_reg_state *reg, int off,
4970 			       int size, bool strict_alignment_once)
4971 {
4972 	bool strict = env->strict_alignment || strict_alignment_once;
4973 	const char *pointer_desc = "";
4974 
4975 	switch (reg->type) {
4976 	case PTR_TO_PACKET:
4977 	case PTR_TO_PACKET_META:
4978 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
4979 		 * right in front, treat it the very same way.
4980 		 */
4981 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
4982 	case PTR_TO_FLOW_KEYS:
4983 		pointer_desc = "flow keys ";
4984 		break;
4985 	case PTR_TO_MAP_KEY:
4986 		pointer_desc = "key ";
4987 		break;
4988 	case PTR_TO_MAP_VALUE:
4989 		pointer_desc = "value ";
4990 		if (reg->map_ptr->map_type == BPF_MAP_TYPE_INSN_ARRAY)
4991 			strict = true;
4992 		break;
4993 	case PTR_TO_CTX:
4994 		pointer_desc = "context ";
4995 		break;
4996 	case PTR_TO_STACK:
4997 		pointer_desc = "stack ";
4998 		/* The stack spill tracking logic in check_stack_write_fixed_off()
4999 		 * and check_stack_read_fixed_off() relies on stack accesses being
5000 		 * aligned.
5001 		 */
5002 		strict = true;
5003 		break;
5004 	case PTR_TO_SOCKET:
5005 		pointer_desc = "sock ";
5006 		break;
5007 	case PTR_TO_SOCK_COMMON:
5008 		pointer_desc = "sock_common ";
5009 		break;
5010 	case PTR_TO_TCP_SOCK:
5011 		pointer_desc = "tcp_sock ";
5012 		break;
5013 	case PTR_TO_XDP_SOCK:
5014 		pointer_desc = "xdp_sock ";
5015 		break;
5016 	case PTR_TO_ARENA:
5017 		return 0;
5018 	default:
5019 		break;
5020 	}
5021 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
5022 					   strict);
5023 }
5024 
5025 static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog)
5026 {
5027 	if (!bpf_jit_supports_private_stack())
5028 		return NO_PRIV_STACK;
5029 
5030 	/* bpf_prog_check_recur() checks all prog types that use bpf trampoline
5031 	 * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked
5032 	 * explicitly.
5033 	 */
5034 	switch (prog->type) {
5035 	case BPF_PROG_TYPE_KPROBE:
5036 	case BPF_PROG_TYPE_TRACEPOINT:
5037 	case BPF_PROG_TYPE_PERF_EVENT:
5038 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
5039 		return PRIV_STACK_ADAPTIVE;
5040 	case BPF_PROG_TYPE_TRACING:
5041 	case BPF_PROG_TYPE_LSM:
5042 	case BPF_PROG_TYPE_STRUCT_OPS:
5043 		if (prog->aux->priv_stack_requested || bpf_prog_check_recur(prog))
5044 			return PRIV_STACK_ADAPTIVE;
5045 		fallthrough;
5046 	default:
5047 		break;
5048 	}
5049 
5050 	return NO_PRIV_STACK;
5051 }
5052 
5053 static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
5054 {
5055 	if (env->prog->jit_requested)
5056 		return round_up(stack_depth, 16);
5057 
5058 	/* round up to 32-bytes, since this is granularity
5059 	 * of interpreter stack size
5060 	 */
5061 	return round_up(max_t(u32, stack_depth, 1), 32);
5062 }
5063 
5064 /* temporary state used for call frame depth calculation */
5065 struct bpf_subprog_call_depth_info {
5066 	int ret_insn; /* caller instruction where we return to. */
5067 	int caller; /* caller subprogram idx */
5068 	int frame; /* # of consecutive static call stack frames on top of stack */
5069 };
5070 
5071 /* starting from main bpf function walk all instructions of the function
5072  * and recursively walk all callees that given function can call.
5073  * Ignore jump and exit insns.
5074  */
5075 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
5076 					 struct bpf_subprog_call_depth_info *dinfo,
5077 					 bool priv_stack_supported)
5078 {
5079 	struct bpf_subprog_info *subprog = env->subprog_info;
5080 	struct bpf_insn *insn = env->prog->insnsi;
5081 	int depth = 0, frame = 0, i, subprog_end, subprog_depth;
5082 	bool tail_call_reachable = false;
5083 	int total;
5084 	int tmp;
5085 
5086 	/* no caller idx */
5087 	dinfo[idx].caller = -1;
5088 
5089 	i = subprog[idx].start;
5090 	if (!priv_stack_supported)
5091 		subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5092 process_func:
5093 	/* protect against potential stack overflow that might happen when
5094 	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
5095 	 * depth for such case down to 256 so that the worst case scenario
5096 	 * would result in 8k stack size (32 which is tailcall limit * 256 =
5097 	 * 8k).
5098 	 *
5099 	 * To get the idea what might happen, see an example:
5100 	 * func1 -> sub rsp, 128
5101 	 *  subfunc1 -> sub rsp, 256
5102 	 *  tailcall1 -> add rsp, 256
5103 	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
5104 	 *   subfunc2 -> sub rsp, 64
5105 	 *   subfunc22 -> sub rsp, 128
5106 	 *   tailcall2 -> add rsp, 128
5107 	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
5108 	 *
5109 	 * tailcall will unwind the current stack frame but it will not get rid
5110 	 * of caller's stack as shown on the example above.
5111 	 */
5112 	if (idx && subprog[idx].has_tail_call && depth >= 256) {
5113 		verbose(env,
5114 			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
5115 			depth);
5116 		return -EACCES;
5117 	}
5118 
5119 	subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth);
5120 	if (IS_ENABLED(CONFIG_X86_64) && subprog[idx].stack_arg_cnt) {
5121 		/* x86-64 uses R9 for both private stack frame pointer and arg6. */
5122 		subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5123 	} else if (priv_stack_supported) {
5124 		/* Request private stack support only if the subprog stack
5125 		 * depth is no less than BPF_PRIV_STACK_MIN_SIZE. This is to
5126 		 * avoid jit penalty if the stack usage is small.
5127 		 */
5128 		if (subprog[idx].priv_stack_mode == PRIV_STACK_UNKNOWN &&
5129 		    subprog_depth >= BPF_PRIV_STACK_MIN_SIZE)
5130 			subprog[idx].priv_stack_mode = PRIV_STACK_ADAPTIVE;
5131 	}
5132 
5133 	if (subprog[idx].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
5134 		if (subprog_depth > env->max_stack_depth)
5135 			env->max_stack_depth = subprog_depth;
5136 		if (subprog_depth > MAX_BPF_STACK) {
5137 			verbose(env, "stack size of subprog %d is %d. Too large\n",
5138 				idx, subprog_depth);
5139 			return -EACCES;
5140 		}
5141 	} else {
5142 		depth += subprog_depth;
5143 		if (depth > env->max_stack_depth)
5144 			env->max_stack_depth = depth;
5145 		if (depth > MAX_BPF_STACK) {
5146 			total = 0;
5147 			for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller)
5148 				total++;
5149 
5150 			verbose(env, "combined stack size of %d calls is %d. Too large\n",
5151 				total, depth);
5152 			return -EACCES;
5153 		}
5154 	}
5155 continue_func:
5156 	subprog_end = subprog[idx + 1].start;
5157 	for (; i < subprog_end; i++) {
5158 		int next_insn, sidx;
5159 
5160 		if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) {
5161 			bool err = false;
5162 
5163 			if (!bpf_is_throw_kfunc(insn + i))
5164 				continue;
5165 			for (tmp = idx; tmp >= 0 && !err; tmp = dinfo[tmp].caller) {
5166 				if (subprog[tmp].is_cb) {
5167 					err = true;
5168 					break;
5169 				}
5170 			}
5171 			if (!err)
5172 				continue;
5173 			verbose(env,
5174 				"bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
5175 				i, idx);
5176 			return -EINVAL;
5177 		}
5178 
5179 		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
5180 			continue;
5181 		/* remember insn and function to return to */
5182 
5183 		/* find the callee */
5184 		next_insn = i + insn[i].imm + 1;
5185 		sidx = bpf_find_subprog(env, next_insn);
5186 		if (verifier_bug_if(sidx < 0, env, "callee not found at insn %d", next_insn))
5187 			return -EFAULT;
5188 		if (subprog[sidx].is_async_cb) {
5189 			if (subprog[sidx].has_tail_call) {
5190 				verifier_bug(env, "subprog has tail_call and async cb");
5191 				return -EFAULT;
5192 			}
5193 			/* async callbacks don't increase bpf prog stack size unless called directly */
5194 			if (!bpf_pseudo_call(insn + i))
5195 				continue;
5196 			if (subprog[sidx].is_exception_cb) {
5197 				verbose(env, "insn %d cannot call exception cb directly", i);
5198 				return -EINVAL;
5199 			}
5200 		}
5201 
5202 		/* store caller info for after we return from callee */
5203 		dinfo[idx].frame = frame;
5204 		dinfo[idx].ret_insn = i + 1;
5205 
5206 		/* push caller idx into callee's dinfo */
5207 		dinfo[sidx].caller = idx;
5208 
5209 		i = next_insn;
5210 
5211 		idx = sidx;
5212 		if (!priv_stack_supported)
5213 			subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5214 
5215 		if (subprog[idx].has_tail_call)
5216 			tail_call_reachable = true;
5217 
5218 		frame = bpf_subprog_is_global(env, idx) ? 0 : frame + 1;
5219 		if (frame >= MAX_CALL_FRAMES) {
5220 			verbose(env, "the call stack of %d frames is too deep !\n",
5221 				frame);
5222 			return -E2BIG;
5223 		}
5224 		goto process_func;
5225 	}
5226 	/* if tail call got detected across bpf2bpf calls then mark each of the
5227 	 * currently present subprog frames as tail call reachable subprogs;
5228 	 * this info will be utilized by JIT so that we will be preserving the
5229 	 * tail call counter throughout bpf2bpf calls combined with tailcalls
5230 	 */
5231 	if (tail_call_reachable) {
5232 		for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller) {
5233 			if (subprog[tmp].is_exception_cb) {
5234 				verbose(env, "cannot tail call within exception cb\n");
5235 				return -EINVAL;
5236 			}
5237 			if (subprog[tmp].stack_arg_cnt) {
5238 				verbose(env, "tail_calls are not allowed in programs with stack args\n");
5239 				return -EINVAL;
5240 			}
5241 			subprog[tmp].tail_call_reachable = true;
5242 		}
5243 	} else if (!idx && subprog[0].has_tail_call && subprog[0].stack_arg_cnt) {
5244 		verbose(env, "tail_calls are not allowed in programs with stack args\n");
5245 		return -EINVAL;
5246 	}
5247 
5248 	if (subprog[0].tail_call_reachable)
5249 		env->prog->aux->tail_call_reachable = true;
5250 
5251 	/* end of for() loop means the last insn of the 'subprog'
5252 	 * was reached. Doesn't matter whether it was JA or EXIT
5253 	 */
5254 	if (frame == 0 && dinfo[idx].caller < 0)
5255 		return 0;
5256 	if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE)
5257 		depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
5258 
5259 	/* pop caller idx from callee */
5260 	idx = dinfo[idx].caller;
5261 
5262 	/* retrieve caller state from its frame */
5263 	frame = dinfo[idx].frame;
5264 	i = dinfo[idx].ret_insn;
5265 
5266 	/* reset tail_call_reachable to the parent's actual state */
5267 	tail_call_reachable = subprog[idx].tail_call_reachable;
5268 
5269 	goto continue_func;
5270 }
5271 
5272 static int check_max_stack_depth(struct bpf_verifier_env *env)
5273 {
5274 	enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN;
5275 	struct bpf_subprog_call_depth_info *dinfo;
5276 	struct bpf_subprog_info *si = env->subprog_info;
5277 	bool priv_stack_supported;
5278 	int ret;
5279 
5280 	dinfo = kvcalloc(env->subprog_cnt, sizeof(*dinfo), GFP_KERNEL_ACCOUNT);
5281 	if (!dinfo)
5282 		return -ENOMEM;
5283 
5284 	for (int i = 0; i < env->subprog_cnt; i++) {
5285 		if (si[i].has_tail_call) {
5286 			priv_stack_mode = NO_PRIV_STACK;
5287 			break;
5288 		}
5289 	}
5290 
5291 	if (priv_stack_mode == PRIV_STACK_UNKNOWN)
5292 		priv_stack_mode = bpf_enable_priv_stack(env->prog);
5293 
5294 	/* All async_cb subprogs use normal kernel stack. If a particular
5295 	 * subprog appears in both main prog and async_cb subtree, that
5296 	 * subprog will use normal kernel stack to avoid potential nesting.
5297 	 * The reverse subprog traversal ensures when main prog subtree is
5298 	 * checked, the subprogs appearing in async_cb subtrees are already
5299 	 * marked as using normal kernel stack, so stack size checking can
5300 	 * be done properly.
5301 	 */
5302 	for (int i = env->subprog_cnt - 1; i >= 0; i--) {
5303 		if (!i || si[i].is_async_cb) {
5304 			priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE;
5305 			ret = check_max_stack_depth_subprog(env, i, dinfo,
5306 					priv_stack_supported);
5307 			if (ret < 0) {
5308 				kvfree(dinfo);
5309 				return ret;
5310 			}
5311 		}
5312 	}
5313 
5314 	for (int i = 0; i < env->subprog_cnt; i++) {
5315 		if (si[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
5316 			env->prog->aux->jits_use_priv_stack = true;
5317 			break;
5318 		}
5319 	}
5320 
5321 	kvfree(dinfo);
5322 
5323 	return 0;
5324 }
5325 
5326 static int __check_buffer_access(struct bpf_verifier_env *env,
5327 				 const char *buf_info,
5328 				 const struct bpf_reg_state *reg,
5329 				 argno_t argno, int off, int size)
5330 {
5331 	if (off < 0) {
5332 		verbose(env,
5333 			"%s invalid %s buffer access: off=%d, size=%d\n",
5334 			reg_arg_name(env, argno), buf_info, off, size);
5335 		return -EACCES;
5336 	}
5337 	if (!tnum_is_const(reg->var_off)) {
5338 		char tn_buf[48];
5339 
5340 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5341 		verbose(env,
5342 			"%s invalid variable buffer offset: off=%d, var_off=%s\n",
5343 			reg_arg_name(env, argno), off, tn_buf);
5344 		return -EACCES;
5345 	}
5346 
5347 	return 0;
5348 }
5349 
5350 static int check_tp_buffer_access(struct bpf_verifier_env *env,
5351 				  const struct bpf_reg_state *reg,
5352 				  argno_t argno, int off, int size)
5353 {
5354 	int err;
5355 
5356 	err = __check_buffer_access(env, "tracepoint", reg, argno, off, size);
5357 	if (err)
5358 		return err;
5359 
5360 	env->prog->aux->max_tp_access = max(reg->var_off.value + off + size,
5361 					    env->prog->aux->max_tp_access);
5362 
5363 	return 0;
5364 }
5365 
5366 static int check_buffer_access(struct bpf_verifier_env *env,
5367 			       const struct bpf_reg_state *reg,
5368 			       argno_t argno, int off, int size,
5369 			       bool zero_size_allowed,
5370 			       u32 *max_access)
5371 {
5372 	const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
5373 	int err;
5374 
5375 	err = __check_buffer_access(env, buf_info, reg, argno, off, size);
5376 	if (err)
5377 		return err;
5378 
5379 	*max_access = max(reg->var_off.value + off + size, *max_access);
5380 
5381 	return 0;
5382 }
5383 
5384 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
5385 static void zext_32_to_64(struct bpf_reg_state *reg)
5386 {
5387 	reg->var_off = tnum_subreg(reg->var_off);
5388 	reg_set_urange64(reg, reg_u32_min(reg), reg_u32_max(reg));
5389 }
5390 
5391 /* truncate register to smaller size (in bytes)
5392  * must be called with size < BPF_REG_SIZE
5393  */
5394 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
5395 {
5396 	u64 mask;
5397 
5398 	/* clear high bits in bit representation */
5399 	reg->var_off = tnum_cast(reg->var_off, size);
5400 
5401 	/* fix arithmetic bounds */
5402 	mask = ((u64)1 << (size * 8)) - 1;
5403 	if ((reg_umin(reg) & ~mask) == (reg_umax(reg) & ~mask))
5404 		reg_set_urange64(reg, reg_umin(reg) & mask, reg_umax(reg) & mask);
5405 	else
5406 		reg_set_urange64(reg, 0, mask);
5407 
5408 	/* If size is smaller than 32bit register the 32bit register
5409 	 * values are also truncated so we push 64-bit bounds into
5410 	 * 32-bit bounds. Above were truncated < 32-bits already.
5411 	 */
5412 	if (size < 4)
5413 		__mark_reg32_unbounded(reg);
5414 
5415 	reg_bounds_sync(reg);
5416 }
5417 
5418 static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
5419 {
5420 	if (size == 1) {
5421 		reg_set_srange64(reg, S8_MIN, S8_MAX);
5422 		reg_set_srange32(reg, S8_MIN, S8_MAX);
5423 	} else if (size == 2) {
5424 		reg_set_srange64(reg, S16_MIN, S16_MAX);
5425 		reg_set_srange32(reg, S16_MIN, S16_MAX);
5426 	} else {
5427 		/* size == 4 */
5428 		reg_set_srange64(reg, S32_MIN, S32_MAX);
5429 		reg_set_srange32(reg, S32_MIN, S32_MAX);
5430 	}
5431 	reg->var_off = tnum_unknown;
5432 }
5433 
5434 static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
5435 {
5436 	s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval;
5437 	u64 top_smax_value, top_smin_value;
5438 	u64 num_bits = size * 8;
5439 
5440 	if (tnum_is_const(reg->var_off)) {
5441 		u64_cval = reg->var_off.value;
5442 		if (size == 1)
5443 			reg->var_off = tnum_const((s8)u64_cval);
5444 		else if (size == 2)
5445 			reg->var_off = tnum_const((s16)u64_cval);
5446 		else
5447 			/* size == 4 */
5448 			reg->var_off = tnum_const((s32)u64_cval);
5449 
5450 		u64_cval = reg->var_off.value;
5451 		reg->r64 = cnum64_from_urange(u64_cval, u64_cval);
5452 		reg->r32 = cnum32_from_urange((u32)u64_cval, (u32)u64_cval);
5453 		return;
5454 	}
5455 
5456 	top_smax_value = ((u64)reg_smax(reg) >> num_bits) << num_bits;
5457 	top_smin_value = ((u64)reg_smin(reg) >> num_bits) << num_bits;
5458 
5459 	if (top_smax_value != top_smin_value)
5460 		goto out;
5461 
5462 	/* find the s64_min and s64_min after sign extension */
5463 	if (size == 1) {
5464 		init_s64_max = (s8)reg_smax(reg);
5465 		init_s64_min = (s8)reg_smin(reg);
5466 	} else if (size == 2) {
5467 		init_s64_max = (s16)reg_smax(reg);
5468 		init_s64_min = (s16)reg_smin(reg);
5469 	} else {
5470 		init_s64_max = (s32)reg_smax(reg);
5471 		init_s64_min = (s32)reg_smin(reg);
5472 	}
5473 
5474 	s64_max = max(init_s64_max, init_s64_min);
5475 	s64_min = min(init_s64_max, init_s64_min);
5476 
5477 	/* both of s64_max/s64_min positive or negative */
5478 	if ((s64_max >= 0) == (s64_min >= 0)) {
5479 		reg_set_srange64(reg, s64_min, s64_max);
5480 		reg_set_srange32(reg, s64_min, s64_max);
5481 		reg->var_off = tnum_range(s64_min, s64_max);
5482 		return;
5483 	}
5484 
5485 out:
5486 	set_sext64_default_val(reg, size);
5487 }
5488 
5489 static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
5490 {
5491 	if (size == 1)
5492 		reg_set_srange32(reg, S8_MIN, S8_MAX);
5493 	else
5494 		/* size == 2 */
5495 		reg_set_srange32(reg, S16_MIN, S16_MAX);
5496 	reg->var_off = tnum_subreg(tnum_unknown);
5497 }
5498 
5499 static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
5500 {
5501 	s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val;
5502 	u32 top_smax_value, top_smin_value;
5503 	u32 num_bits = size * 8;
5504 
5505 	if (tnum_is_const(reg->var_off)) {
5506 		u32_val = reg->var_off.value;
5507 		if (size == 1)
5508 			reg->var_off = tnum_const((s8)u32_val);
5509 		else
5510 			reg->var_off = tnum_const((s16)u32_val);
5511 
5512 		u32_val = reg->var_off.value;
5513 		reg_set_srange32(reg, u32_val, u32_val);
5514 		return;
5515 	}
5516 
5517 	top_smax_value = ((u32)reg_s32_max(reg) >> num_bits) << num_bits;
5518 	top_smin_value = ((u32)reg_s32_min(reg) >> num_bits) << num_bits;
5519 
5520 	if (top_smax_value != top_smin_value)
5521 		goto out;
5522 
5523 	/* find the s32_min and s32_min after sign extension */
5524 	if (size == 1) {
5525 		init_s32_max = (s8)reg_s32_max(reg);
5526 		init_s32_min = (s8)reg_s32_min(reg);
5527 	} else {
5528 		/* size == 2 */
5529 		init_s32_max = (s16)reg_s32_max(reg);
5530 		init_s32_min = (s16)reg_s32_min(reg);
5531 	}
5532 	s32_max = max(init_s32_max, init_s32_min);
5533 	s32_min = min(init_s32_max, init_s32_min);
5534 
5535 	if ((s32_min >= 0) == (s32_max >= 0)) {
5536 		reg_set_srange32(reg, s32_min, s32_max);
5537 		reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
5538 		return;
5539 	}
5540 
5541 out:
5542 	set_sext32_default_val(reg, size);
5543 }
5544 
5545 bool bpf_map_is_rdonly(const struct bpf_map *map)
5546 {
5547 	/* A map is considered read-only if the following condition are true:
5548 	 *
5549 	 * 1) BPF program side cannot change any of the map content. The
5550 	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
5551 	 *    and was set at map creation time.
5552 	 * 2) The map value(s) have been initialized from user space by a
5553 	 *    loader and then "frozen", such that no new map update/delete
5554 	 *    operations from syscall side are possible for the rest of
5555 	 *    the map's lifetime from that point onwards.
5556 	 * 3) Any parallel/pending map update/delete operations from syscall
5557 	 *    side have been completed. Only after that point, it's safe to
5558 	 *    assume that map value(s) are immutable.
5559 	 */
5560 	return (map->map_flags & BPF_F_RDONLY_PROG) &&
5561 	       READ_ONCE(map->frozen) &&
5562 	       !bpf_map_write_active(map);
5563 }
5564 
5565 int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
5566 			bool is_ldsx)
5567 {
5568 	void *ptr;
5569 	u64 addr;
5570 	int err;
5571 
5572 	err = map->ops->map_direct_value_addr(map, &addr, off);
5573 	if (err)
5574 		return err;
5575 	ptr = (void *)(long)addr + off;
5576 
5577 	switch (size) {
5578 	case sizeof(u8):
5579 		*val = is_ldsx ? (s64)*(s8 *)ptr : (u64)*(u8 *)ptr;
5580 		break;
5581 	case sizeof(u16):
5582 		*val = is_ldsx ? (s64)*(s16 *)ptr : (u64)*(u16 *)ptr;
5583 		break;
5584 	case sizeof(u32):
5585 		*val = is_ldsx ? (s64)*(s32 *)ptr : (u64)*(u32 *)ptr;
5586 		break;
5587 	case sizeof(u64):
5588 		*val = *(u64 *)ptr;
5589 		break;
5590 	default:
5591 		return -EINVAL;
5592 	}
5593 	return 0;
5594 }
5595 
5596 #define BTF_TYPE_SAFE_RCU(__type)  __PASTE(__type, __safe_rcu)
5597 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type)  __PASTE(__type, __safe_rcu_or_null)
5598 #define BTF_TYPE_SAFE_TRUSTED(__type)  __PASTE(__type, __safe_trusted)
5599 #define BTF_TYPE_SAFE_TRUSTED_OR_NULL(__type)  __PASTE(__type, __safe_trusted_or_null)
5600 
5601 /*
5602  * Allow list few fields as RCU trusted or full trusted.
5603  * This logic doesn't allow mix tagging and will be removed once GCC supports
5604  * btf_type_tag.
5605  */
5606 
5607 /* RCU trusted: these fields are trusted in RCU CS and never NULL */
5608 BTF_TYPE_SAFE_RCU(struct task_struct) {
5609 	const cpumask_t *cpus_ptr;
5610 	struct css_set __rcu *cgroups;
5611 	struct task_struct __rcu *real_parent;
5612 	struct task_struct *group_leader;
5613 };
5614 
5615 BTF_TYPE_SAFE_RCU(struct cgroup) {
5616 	/* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
5617 	struct kernfs_node *kn;
5618 };
5619 
5620 BTF_TYPE_SAFE_RCU(struct css_set) {
5621 	struct cgroup *dfl_cgrp;
5622 };
5623 
5624 BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state) {
5625 	struct cgroup *cgroup;
5626 };
5627 
5628 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */
5629 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
5630 	struct file __rcu *exe_file;
5631 #ifdef CONFIG_MEMCG
5632 	struct task_struct __rcu *owner;
5633 #endif
5634 };
5635 
5636 /* skb->sk, req->sk are not RCU protected, but we mark them as such
5637  * because bpf prog accessible sockets are SOCK_RCU_FREE.
5638  */
5639 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
5640 	struct sock *sk;
5641 };
5642 
5643 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
5644 	struct sock *sk;
5645 };
5646 
5647 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */
5648 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
5649 	struct seq_file *seq;
5650 };
5651 
5652 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
5653 	struct bpf_iter_meta *meta;
5654 	struct task_struct *task;
5655 };
5656 
5657 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
5658 	struct file *file;
5659 };
5660 
5661 BTF_TYPE_SAFE_TRUSTED(struct file) {
5662 	struct inode *f_inode;
5663 };
5664 
5665 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) {
5666 	struct inode *d_inode;
5667 };
5668 
5669 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
5670 	struct sock *sk;
5671 };
5672 
5673 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct) {
5674 	struct mm_struct *vm_mm;
5675 	struct file *vm_file;
5676 };
5677 
5678 static bool type_is_rcu(struct bpf_verifier_env *env,
5679 			struct bpf_reg_state *reg,
5680 			const char *field_name, u32 btf_id)
5681 {
5682 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
5683 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
5684 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
5685 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state));
5686 
5687 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
5688 }
5689 
5690 static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
5691 				struct bpf_reg_state *reg,
5692 				const char *field_name, u32 btf_id)
5693 {
5694 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
5695 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
5696 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
5697 
5698 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null");
5699 }
5700 
5701 static bool type_is_trusted(struct bpf_verifier_env *env,
5702 			    struct bpf_reg_state *reg,
5703 			    const char *field_name, u32 btf_id)
5704 {
5705 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
5706 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
5707 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
5708 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
5709 
5710 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
5711 }
5712 
5713 static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
5714 				    struct bpf_reg_state *reg,
5715 				    const char *field_name, u32 btf_id)
5716 {
5717 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
5718 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry));
5719 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct));
5720 
5721 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
5722 					  "__safe_trusted_or_null");
5723 }
5724 
5725 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
5726 				   struct bpf_reg_state *regs, struct bpf_reg_state *reg,
5727 				   argno_t argno, int off, int size,
5728 				   enum bpf_access_type atype,
5729 				   int value_regno)
5730 {
5731 	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
5732 	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
5733 	const char *field_name = NULL;
5734 	enum bpf_type_flag flag = 0;
5735 	u32 btf_id = 0;
5736 	int ret;
5737 
5738 	if (!env->allow_ptr_leaks) {
5739 		verbose(env,
5740 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5741 			tname);
5742 		return -EPERM;
5743 	}
5744 	if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
5745 		verbose(env,
5746 			"Cannot access kernel 'struct %s' from non-GPL compatible program\n",
5747 			tname);
5748 		return -EINVAL;
5749 	}
5750 
5751 	if (!tnum_is_const(reg->var_off)) {
5752 		char tn_buf[48];
5753 
5754 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5755 		verbose(env,
5756 			"%s is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
5757 			reg_arg_name(env, argno), tname, off, tn_buf);
5758 		return -EACCES;
5759 	}
5760 
5761 	off += reg->var_off.value;
5762 
5763 	if (off < 0) {
5764 		verbose(env,
5765 			"%s is ptr_%s invalid negative access: off=%d\n",
5766 			reg_arg_name(env, argno), tname, off);
5767 		return -EACCES;
5768 	}
5769 
5770 	if (reg->type & MEM_USER) {
5771 		verbose(env,
5772 			"%s is ptr_%s access user memory: off=%d\n",
5773 			reg_arg_name(env, argno), tname, off);
5774 		return -EACCES;
5775 	}
5776 
5777 	if (reg->type & MEM_PERCPU) {
5778 		verbose(env,
5779 			"%s is ptr_%s access percpu memory: off=%d\n",
5780 			reg_arg_name(env, argno), tname, off);
5781 		return -EACCES;
5782 	}
5783 
5784 	if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
5785 		if (!btf_is_kernel(reg->btf)) {
5786 			verifier_bug(env, "reg->btf must be kernel btf");
5787 			return -EFAULT;
5788 		}
5789 		ret = env->ops->btf_struct_access(&env->log, reg, off, size);
5790 		if (ret < 0)
5791 			verbose(env,
5792 				"%s cannot write into ptr_%s at off=%d size=%d\n",
5793 				reg_arg_name(env, argno), tname, off, size);
5794 	} else {
5795 		/* Writes are permitted with default btf_struct_access for
5796 		 * program allocated objects (which always have id > 0),
5797 		 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
5798 		 */
5799 		if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) {
5800 			verbose(env, "only read is supported\n");
5801 			return -EACCES;
5802 		}
5803 
5804 		if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
5805 		    !(reg->type & MEM_RCU) && !reg_is_referenced(env, reg)) {
5806 			verifier_bug(env, "allocated object must have a referenced id");
5807 			return -EFAULT;
5808 		}
5809 
5810 		ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name);
5811 	}
5812 
5813 	if (ret < 0)
5814 		return ret;
5815 
5816 	if (ret != PTR_TO_BTF_ID) {
5817 		/* just mark; */
5818 
5819 	} else if (type_flag(reg->type) & PTR_UNTRUSTED) {
5820 		/* If this is an untrusted pointer, all pointers formed by walking it
5821 		 * also inherit the untrusted flag.
5822 		 */
5823 		flag = PTR_UNTRUSTED;
5824 
5825 	} else if (is_trusted_reg(env, reg) || is_rcu_reg(reg)) {
5826 		/* By default any pointer obtained from walking a trusted pointer is no
5827 		 * longer trusted, unless the field being accessed has explicitly been
5828 		 * marked as inheriting its parent's state of trust (either full or RCU).
5829 		 * For example:
5830 		 * 'cgroups' pointer is untrusted if task->cgroups dereference
5831 		 * happened in a sleepable program outside of bpf_rcu_read_lock()
5832 		 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
5833 		 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
5834 		 *
5835 		 * A regular RCU-protected pointer with __rcu tag can also be deemed
5836 		 * trusted if we are in an RCU CS. Such pointer can be NULL.
5837 		 */
5838 		if (type_is_trusted(env, reg, field_name, btf_id)) {
5839 			flag |= PTR_TRUSTED;
5840 		} else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) {
5841 			flag |= PTR_TRUSTED | PTR_MAYBE_NULL;
5842 		} else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
5843 			if (type_is_rcu(env, reg, field_name, btf_id)) {
5844 				/* ignore __rcu tag and mark it MEM_RCU */
5845 				flag |= MEM_RCU;
5846 			} else if (flag & MEM_RCU ||
5847 				   type_is_rcu_or_null(env, reg, field_name, btf_id)) {
5848 				/* __rcu tagged pointers can be NULL */
5849 				flag |= MEM_RCU | PTR_MAYBE_NULL;
5850 
5851 				/* We always trust them */
5852 				if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
5853 				    flag & PTR_UNTRUSTED)
5854 					flag &= ~PTR_UNTRUSTED;
5855 			} else if (flag & (MEM_PERCPU | MEM_USER)) {
5856 				/* keep as-is */
5857 			} else {
5858 				/* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
5859 				clear_trusted_flags(&flag);
5860 			}
5861 		} else {
5862 			/*
5863 			 * If not in RCU CS or MEM_RCU pointer can be NULL then
5864 			 * aggressively mark as untrusted otherwise such
5865 			 * pointers will be plain PTR_TO_BTF_ID without flags
5866 			 * and will be allowed to be passed into helpers for
5867 			 * compat reasons.
5868 			 */
5869 			flag = PTR_UNTRUSTED;
5870 		}
5871 	} else {
5872 		/* Old compat. Deprecated */
5873 		clear_trusted_flags(&flag);
5874 	}
5875 
5876 	if (atype == BPF_READ && value_regno >= 0) {
5877 		ret = mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
5878 		if (ret < 0)
5879 			return ret;
5880 	}
5881 
5882 	return 0;
5883 }
5884 
5885 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
5886 				   struct bpf_reg_state *regs, struct bpf_reg_state *reg,
5887 				   argno_t argno, int off, int size,
5888 				   enum bpf_access_type atype,
5889 				   int value_regno)
5890 {
5891 	struct bpf_map *map = reg->map_ptr;
5892 	struct bpf_reg_state map_reg;
5893 	enum bpf_type_flag flag = 0;
5894 	const struct btf_type *t;
5895 	const char *tname;
5896 	u32 btf_id;
5897 	int ret;
5898 
5899 	if (!btf_vmlinux) {
5900 		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
5901 		return -ENOTSUPP;
5902 	}
5903 
5904 	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
5905 		verbose(env, "map_ptr access not supported for map type %d\n",
5906 			map->map_type);
5907 		return -ENOTSUPP;
5908 	}
5909 
5910 	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
5911 	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5912 
5913 	if (!env->allow_ptr_leaks) {
5914 		verbose(env,
5915 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5916 			tname);
5917 		return -EPERM;
5918 	}
5919 
5920 	if (off < 0) {
5921 		verbose(env, "%s is %s invalid negative access: off=%d\n",
5922 			reg_arg_name(env, argno), tname, off);
5923 		return -EACCES;
5924 	}
5925 
5926 	if (atype != BPF_READ) {
5927 		verbose(env, "only read from %s is supported\n", tname);
5928 		return -EACCES;
5929 	}
5930 
5931 	/* Simulate access to a PTR_TO_BTF_ID */
5932 	memset(&map_reg, 0, sizeof(map_reg));
5933 	ret = mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID,
5934 			      btf_vmlinux, *map->ops->map_btf_id, 0);
5935 	if (ret < 0)
5936 		return ret;
5937 	ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
5938 	if (ret < 0)
5939 		return ret;
5940 
5941 	if (value_regno >= 0) {
5942 		ret = mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
5943 		if (ret < 0)
5944 			return ret;
5945 	}
5946 
5947 	return 0;
5948 }
5949 
5950 /* Check that the stack access at the given offset is within bounds. The
5951  * maximum valid offset is -1.
5952  *
5953  * The minimum valid offset is -MAX_BPF_STACK for writes, and
5954  * -state->allocated_stack for reads.
5955  */
5956 static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
5957                                           s64 off,
5958                                           struct bpf_func_state *state,
5959                                           enum bpf_access_type t)
5960 {
5961 	int min_valid_off;
5962 
5963 	if (t == BPF_WRITE || env->allow_uninit_stack)
5964 		min_valid_off = -MAX_BPF_STACK;
5965 	else
5966 		min_valid_off = -state->allocated_stack;
5967 
5968 	if (off < min_valid_off || off > -1)
5969 		return -EACCES;
5970 	return 0;
5971 }
5972 
5973 /* Check that the stack access at 'regno + off' falls within the maximum stack
5974  * bounds.
5975  *
5976  * 'off' includes `regno->offset`, but not its dynamic part (if any).
5977  */
5978 static int check_stack_access_within_bounds(
5979 		struct bpf_verifier_env *env, struct bpf_reg_state *reg,
5980 		argno_t argno, int off, int access_size,
5981 		enum bpf_access_type type)
5982 {
5983 	struct bpf_func_state *state = bpf_func(env, reg);
5984 	s64 min_off, max_off;
5985 	int err;
5986 	char *err_extra;
5987 
5988 	if (type == BPF_READ)
5989 		err_extra = " read from";
5990 	else
5991 		err_extra = " write to";
5992 
5993 	if (tnum_is_const(reg->var_off)) {
5994 		min_off = (s64)reg->var_off.value + off;
5995 		max_off = min_off + access_size;
5996 	} else {
5997 		if (reg_smax(reg) >= BPF_MAX_VAR_OFF ||
5998 		    reg_smin(reg) <= -BPF_MAX_VAR_OFF) {
5999 			verbose(env, "invalid unbounded variable-offset%s stack %s\n",
6000 				err_extra, reg_arg_name(env, argno));
6001 			return -EACCES;
6002 		}
6003 		min_off = reg_smin(reg) + off;
6004 		max_off = reg_smax(reg) + off + access_size;
6005 	}
6006 
6007 	err = check_stack_slot_within_bounds(env, min_off, state, type);
6008 	if (!err && max_off > 0)
6009 		err = -EINVAL; /* out of stack access into non-negative offsets */
6010 	if (!err && access_size < 0)
6011 		/* access_size should not be negative (or overflow an int); others checks
6012 		 * along the way should have prevented such an access.
6013 		 */
6014 		err = -EFAULT; /* invalid negative access size; integer overflow? */
6015 
6016 	if (err) {
6017 		if (tnum_is_const(reg->var_off)) {
6018 			verbose(env, "invalid%s stack %s off=%lld size=%d\n",
6019 				err_extra, reg_arg_name(env, argno), min_off, access_size);
6020 		} else {
6021 			char tn_buf[48];
6022 
6023 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6024 			verbose(env, "invalid variable-offset%s stack %s var_off=%s off=%d size=%d\n",
6025 				err_extra, reg_arg_name(env, argno), tn_buf, off, access_size);
6026 		}
6027 		return err;
6028 	}
6029 
6030 	/* Note that there is no stack access with offset zero, so the needed stack
6031 	 * size is -min_off, not -min_off+1.
6032 	 */
6033 	return grow_stack_state(env, state, -min_off /* size */);
6034 }
6035 
6036 static bool get_func_retval_range(struct bpf_prog *prog,
6037 				  struct bpf_retval_range *range)
6038 {
6039 	if (prog->type == BPF_PROG_TYPE_LSM &&
6040 		prog->expected_attach_type == BPF_LSM_MAC &&
6041 		!bpf_lsm_get_retval_range(prog, range)) {
6042 		return true;
6043 	}
6044 	return false;
6045 }
6046 
6047 static void add_scalar_to_reg(struct bpf_reg_state *dst_reg, s64 val)
6048 {
6049 	struct bpf_reg_state fake_reg;
6050 
6051 	if (!val)
6052 		return;
6053 
6054 	fake_reg.type = SCALAR_VALUE;
6055 	__mark_reg_known(&fake_reg, val);
6056 
6057 	scalar32_min_max_add(dst_reg, &fake_reg);
6058 	scalar_min_max_add(dst_reg, &fake_reg);
6059 	dst_reg->var_off = tnum_add(dst_reg->var_off, fake_reg.var_off);
6060 
6061 	reg_bounds_sync(dst_reg);
6062 }
6063 
6064 /* check whether memory at (regno + off) is accessible for t = (read | write)
6065  * if t==write, value_regno is a register which value is stored into memory
6066  * if t==read, value_regno is a register which will receive the value from memory
6067  * if t==write && value_regno==-1, some unknown value is stored into memory
6068  * if t==read && value_regno==-1, don't care what we read from memory
6069  */
6070 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *reg, argno_t argno,
6071 			    int off, int bpf_size, enum bpf_access_type t,
6072 			    int value_regno, bool strict_alignment_once, bool is_ldsx)
6073 {
6074 	struct bpf_reg_state *regs = cur_regs(env);
6075 	int size, err = 0;
6076 
6077 	size = bpf_size_to_bytes(bpf_size);
6078 	if (size < 0)
6079 		return size;
6080 
6081 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
6082 	if (err)
6083 		return err;
6084 
6085 	if (reg->type == PTR_TO_MAP_KEY) {
6086 		if (t == BPF_WRITE) {
6087 			verbose(env, "write to change key %s not allowed\n",
6088 				reg_arg_name(env, argno));
6089 			return -EACCES;
6090 		}
6091 
6092 		err = check_mem_region_access(env, reg, argno, off, size,
6093 					      reg->map_ptr->key_size, false);
6094 		if (err)
6095 			return err;
6096 		if (value_regno >= 0)
6097 			mark_reg_unknown(env, regs, value_regno);
6098 	} else if (reg->type == PTR_TO_MAP_VALUE) {
6099 		struct btf_field *kptr_field = NULL;
6100 
6101 		if (t == BPF_WRITE && value_regno >= 0 &&
6102 		    is_pointer_value(env, value_regno)) {
6103 			verbose(env, "R%d leaks addr into map\n", value_regno);
6104 			return -EACCES;
6105 		}
6106 		err = check_map_access_type(env, reg, off, size, t);
6107 		if (err)
6108 			return err;
6109 		err = check_map_access(env, reg, argno, off, size, false, ACCESS_DIRECT);
6110 		if (err)
6111 			return err;
6112 		if (tnum_is_const(reg->var_off))
6113 			kptr_field = btf_record_find(reg->map_ptr->record,
6114 						     off + reg->var_off.value, BPF_KPTR | BPF_UPTR);
6115 		if (kptr_field) {
6116 			err = check_map_kptr_access(env, value_regno, insn_idx, kptr_field);
6117 		} else if (t == BPF_READ && value_regno >= 0) {
6118 			struct bpf_map *map = reg->map_ptr;
6119 
6120 			/*
6121 			 * If map is read-only, track its contents as scalars,
6122 			 * unless it is an insn array (see the special case below)
6123 			 */
6124 			if (tnum_is_const(reg->var_off) &&
6125 			    bpf_map_is_rdonly(map) &&
6126 			    map->ops->map_direct_value_addr &&
6127 			    map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
6128 				int map_off = off + reg->var_off.value;
6129 				u64 val = 0;
6130 
6131 				err = bpf_map_direct_read(map, map_off, size,
6132 							  &val, is_ldsx);
6133 				if (err)
6134 					return err;
6135 
6136 				regs[value_regno].type = SCALAR_VALUE;
6137 				__mark_reg_known(&regs[value_regno], val);
6138 			} else if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
6139 				if (bpf_size != BPF_DW) {
6140 					verbose(env, "Invalid read of %d bytes from insn_array\n",
6141 						     size);
6142 					return -EACCES;
6143 				}
6144 				regs[value_regno] = *reg;
6145 				add_scalar_to_reg(&regs[value_regno], off);
6146 				regs[value_regno].type = PTR_TO_INSN;
6147 			} else {
6148 				mark_reg_unknown(env, regs, value_regno);
6149 			}
6150 		}
6151 	} else if (base_type(reg->type) == PTR_TO_MEM) {
6152 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
6153 		bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED);
6154 
6155 		if (type_may_be_null(reg->type)) {
6156 			verbose(env, "%s invalid mem access '%s'\n", reg_arg_name(env, argno),
6157 				reg_type_str(env, reg->type));
6158 			return -EACCES;
6159 		}
6160 
6161 		if (t == BPF_WRITE && rdonly_mem) {
6162 			verbose(env, "%s cannot write into %s\n",
6163 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
6164 			return -EACCES;
6165 		}
6166 
6167 		if (t == BPF_WRITE && value_regno >= 0 &&
6168 		    is_pointer_value(env, value_regno)) {
6169 			verbose(env, "R%d leaks addr into mem\n", value_regno);
6170 			return -EACCES;
6171 		}
6172 
6173 		/*
6174 		 * Accesses to untrusted PTR_TO_MEM are done through probe
6175 		 * instructions, hence no need to check bounds in that case.
6176 		 */
6177 		if (!rdonly_untrusted)
6178 			err = check_mem_region_access(env, reg, argno, off, size,
6179 						      reg->mem_size, false);
6180 		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
6181 			mark_reg_unknown(env, regs, value_regno);
6182 	} else if (reg->type == PTR_TO_CTX) {
6183 		struct bpf_insn_access_aux info = {
6184 			.reg_type = SCALAR_VALUE,
6185 			.is_ldsx = is_ldsx,
6186 			.log = &env->log,
6187 		};
6188 		struct bpf_retval_range range;
6189 
6190 		if (t == BPF_WRITE && value_regno >= 0 &&
6191 		    is_pointer_value(env, value_regno)) {
6192 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
6193 			return -EACCES;
6194 		}
6195 
6196 		err = check_ctx_access(env, insn_idx, reg, argno, off, size, t, &info);
6197 		if (!err && t == BPF_READ && value_regno >= 0) {
6198 			/* ctx access returns either a scalar, or a
6199 			 * PTR_TO_PACKET[_META,_END]. In the latter
6200 			 * case, we know the offset is zero.
6201 			 */
6202 			if (info.reg_type == SCALAR_VALUE) {
6203 				if (info.is_retval && get_func_retval_range(env->prog, &range)) {
6204 					mark_reg_unknown(env, regs, value_regno);
6205 					err = __mark_reg_s32_range(env, regs, value_regno,
6206 								   range.minval, range.maxval);
6207 					if (err)
6208 						return err;
6209 				} else {
6210 					mark_reg_unknown(env, regs, value_regno);
6211 				}
6212 			} else {
6213 				mark_reg_known_zero(env, regs,
6214 						    value_regno);
6215 				/* A load of ctx field could have different
6216 				 * actual load size with the one encoded in the
6217 				 * insn. When the dst is PTR, it is for sure not
6218 				 * a sub-register.
6219 				 */
6220 				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
6221 				if (base_type(info.reg_type) == PTR_TO_BTF_ID) {
6222 					regs[value_regno].btf = info.btf;
6223 					regs[value_regno].btf_id = info.btf_id;
6224 					regs[value_regno].id = info.ref_id;
6225 				}
6226 				if (type_may_be_null(info.reg_type) && !regs[value_regno].id)
6227 					regs[value_regno].id = ++env->id_gen;
6228 			}
6229 			regs[value_regno].type = info.reg_type;
6230 		}
6231 
6232 	} else if (reg->type == PTR_TO_STACK) {
6233 		/* Basic bounds checks. */
6234 		err = check_stack_access_within_bounds(env, reg, argno, off, size, t);
6235 		if (err)
6236 			return err;
6237 
6238 		if (t == BPF_READ)
6239 			err = check_stack_read(env, reg, argno, off, size,
6240 					       value_regno);
6241 		else
6242 			err = check_stack_write(env, reg, off, size,
6243 						value_regno, insn_idx);
6244 	} else if (reg_is_pkt_pointer(reg)) {
6245 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
6246 			verbose(env, "cannot write into packet\n");
6247 			return -EACCES;
6248 		}
6249 		if (t == BPF_WRITE && value_regno >= 0 &&
6250 		    is_pointer_value(env, value_regno)) {
6251 			verbose(env, "R%d leaks addr into packet\n",
6252 				value_regno);
6253 			return -EACCES;
6254 		}
6255 		err = check_packet_access(env, reg, argno, off, size, false);
6256 		if (!err && t == BPF_READ && value_regno >= 0)
6257 			mark_reg_unknown(env, regs, value_regno);
6258 	} else if (reg->type == PTR_TO_FLOW_KEYS) {
6259 		if (t == BPF_WRITE && value_regno >= 0 &&
6260 		    is_pointer_value(env, value_regno)) {
6261 			verbose(env, "R%d leaks addr into flow keys\n",
6262 				value_regno);
6263 			return -EACCES;
6264 		}
6265 
6266 		err = check_flow_keys_access(env, reg, argno, off, size);
6267 		if (!err && t == BPF_READ && value_regno >= 0)
6268 			mark_reg_unknown(env, regs, value_regno);
6269 	} else if (type_is_sk_pointer(reg->type)) {
6270 		if (t == BPF_WRITE) {
6271 			verbose(env, "%s cannot write into %s\n",
6272 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
6273 			return -EACCES;
6274 		}
6275 		err = check_sock_access(env, insn_idx, reg, argno, off, size, t);
6276 		if (!err && value_regno >= 0)
6277 			mark_reg_unknown(env, regs, value_regno);
6278 	} else if (reg->type == PTR_TO_TP_BUFFER) {
6279 		err = check_tp_buffer_access(env, reg, argno, off, size);
6280 		if (!err && t == BPF_READ && value_regno >= 0)
6281 			mark_reg_unknown(env, regs, value_regno);
6282 	} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
6283 		   !type_may_be_null(reg->type)) {
6284 		err = check_ptr_to_btf_access(env, regs, reg, argno, off, size, t,
6285 					      value_regno);
6286 	} else if (reg->type == CONST_PTR_TO_MAP) {
6287 		err = check_ptr_to_map_access(env, regs, reg, argno, off, size, t,
6288 					      value_regno);
6289 	} else if (base_type(reg->type) == PTR_TO_BUF &&
6290 		   !type_may_be_null(reg->type)) {
6291 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
6292 		u32 *max_access;
6293 
6294 		if (rdonly_mem) {
6295 			if (t == BPF_WRITE) {
6296 				verbose(env, "%s cannot write into %s\n",
6297 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
6298 				return -EACCES;
6299 			}
6300 			max_access = &env->prog->aux->max_rdonly_access;
6301 		} else {
6302 			max_access = &env->prog->aux->max_rdwr_access;
6303 		}
6304 
6305 		err = check_buffer_access(env, reg, argno, off, size, false,
6306 					  max_access);
6307 
6308 		if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
6309 			mark_reg_unknown(env, regs, value_regno);
6310 	} else if (reg->type == PTR_TO_ARENA) {
6311 		if (t == BPF_READ && value_regno >= 0)
6312 			mark_reg_unknown(env, regs, value_regno);
6313 	} else {
6314 		verbose(env, "%s invalid mem access '%s'\n", reg_arg_name(env, argno),
6315 			reg_type_str(env, reg->type));
6316 		return -EACCES;
6317 	}
6318 
6319 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
6320 	    regs[value_regno].type == SCALAR_VALUE) {
6321 		if (!is_ldsx)
6322 			/* b/h/w load zero-extends, mark upper bits as known 0 */
6323 			coerce_reg_to_size(&regs[value_regno], size);
6324 		else
6325 			coerce_reg_to_size_sx(&regs[value_regno], size);
6326 	}
6327 	return err;
6328 }
6329 
6330 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
6331 			     bool allow_trust_mismatch);
6332 
6333 static int check_load_mem(struct bpf_verifier_env *env, struct bpf_insn *insn,
6334 			  bool strict_alignment_once, bool is_ldsx,
6335 			  bool allow_trust_mismatch, const char *ctx)
6336 {
6337 	struct bpf_verifier_state *vstate = env->cur_state;
6338 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
6339 	struct bpf_reg_state *regs = cur_regs(env);
6340 	enum bpf_reg_type src_reg_type;
6341 	int err;
6342 
6343 	/* Handle stack arg read */
6344 	if (is_stack_arg_ldx(insn)) {
6345 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
6346 		if (err)
6347 			return err;
6348 		return check_stack_arg_read(env, state, insn->off, insn->dst_reg);
6349 	}
6350 
6351 	/* check src operand */
6352 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6353 	if (err)
6354 		return err;
6355 
6356 	/* check dst operand */
6357 	err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
6358 	if (err)
6359 		return err;
6360 
6361 	src_reg_type = regs[insn->src_reg].type;
6362 
6363 	/* Check if (src_reg + off) is readable. The state of dst_reg will be
6364 	 * updated by this call.
6365 	 */
6366 	err = check_mem_access(env, env->insn_idx, regs + insn->src_reg, argno_from_reg(insn->src_reg), insn->off,
6367 			       BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
6368 			       strict_alignment_once, is_ldsx);
6369 	err = err ?: save_aux_ptr_type(env, src_reg_type,
6370 				       allow_trust_mismatch);
6371 	err = err ?: reg_bounds_sanity_check(env, &regs[insn->dst_reg], ctx);
6372 
6373 	return err;
6374 }
6375 
6376 static int check_store_reg(struct bpf_verifier_env *env, struct bpf_insn *insn,
6377 			   bool strict_alignment_once)
6378 {
6379 	struct bpf_verifier_state *vstate = env->cur_state;
6380 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
6381 	struct bpf_reg_state *regs = cur_regs(env);
6382 	enum bpf_reg_type dst_reg_type;
6383 	int err;
6384 
6385 	/* Handle stack arg write */
6386 	if (is_stack_arg_stx(insn)) {
6387 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
6388 		if (err)
6389 			return err;
6390 		return check_stack_arg_write(env, state, insn->off, regs + insn->src_reg);
6391 	}
6392 
6393 	/* check src1 operand */
6394 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6395 	if (err)
6396 		return err;
6397 
6398 	/* check src2 operand */
6399 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6400 	if (err)
6401 		return err;
6402 
6403 	dst_reg_type = regs[insn->dst_reg].type;
6404 
6405 	/* Check if (dst_reg + off) is writeable. */
6406 	err = check_mem_access(env, env->insn_idx, regs + insn->dst_reg, argno_from_reg(insn->dst_reg), insn->off,
6407 			       BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
6408 			       strict_alignment_once, false);
6409 	err = err ?: save_aux_ptr_type(env, dst_reg_type, false);
6410 
6411 	return err;
6412 }
6413 
6414 static int check_atomic_rmw(struct bpf_verifier_env *env,
6415 			    struct bpf_insn *insn)
6416 {
6417 	struct bpf_reg_state *dst_reg;
6418 	int load_reg;
6419 	int err;
6420 
6421 	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
6422 		verbose(env, "invalid atomic operand size\n");
6423 		return -EINVAL;
6424 	}
6425 
6426 	/* check src1 operand */
6427 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6428 	if (err)
6429 		return err;
6430 
6431 	/* check src2 operand */
6432 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6433 	if (err)
6434 		return err;
6435 
6436 	if (insn->imm == BPF_CMPXCHG) {
6437 		/* Check comparison of R0 with memory location */
6438 		const u32 aux_reg = BPF_REG_0;
6439 
6440 		err = check_reg_arg(env, aux_reg, SRC_OP);
6441 		if (err)
6442 			return err;
6443 
6444 		if (is_pointer_value(env, aux_reg)) {
6445 			verbose(env, "R%d leaks addr into mem\n", aux_reg);
6446 			return -EACCES;
6447 		}
6448 	}
6449 
6450 	if (is_pointer_value(env, insn->src_reg)) {
6451 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6452 		return -EACCES;
6453 	}
6454 
6455 	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
6456 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6457 			insn->dst_reg,
6458 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6459 		return -EACCES;
6460 	}
6461 
6462 	if (insn->imm & BPF_FETCH) {
6463 		if (insn->imm == BPF_CMPXCHG)
6464 			load_reg = BPF_REG_0;
6465 		else
6466 			load_reg = insn->src_reg;
6467 
6468 		/* check and record load of old value */
6469 		err = check_reg_arg(env, load_reg, DST_OP);
6470 		if (err)
6471 			return err;
6472 	} else {
6473 		/* This instruction accesses a memory location but doesn't
6474 		 * actually load it into a register.
6475 		 */
6476 		load_reg = -1;
6477 	}
6478 
6479 	dst_reg = cur_regs(env) + insn->dst_reg;
6480 
6481 	/* Check whether we can read the memory, with second call for fetch
6482 	 * case to simulate the register fill.
6483 	 */
6484 	err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg), insn->off,
6485 			       BPF_SIZE(insn->code), BPF_READ, -1, true, false);
6486 	if (!err && load_reg >= 0)
6487 		err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg),
6488 				       insn->off, BPF_SIZE(insn->code),
6489 				       BPF_READ, load_reg, true, false);
6490 	if (err)
6491 		return err;
6492 
6493 	if (is_arena_reg(env, insn->dst_reg)) {
6494 		err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
6495 		if (err)
6496 			return err;
6497 	}
6498 	/* Check whether we can write into the same memory. */
6499 	err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg), insn->off,
6500 			       BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
6501 	if (err)
6502 		return err;
6503 	return 0;
6504 }
6505 
6506 static int check_atomic_load(struct bpf_verifier_env *env,
6507 			     struct bpf_insn *insn)
6508 {
6509 	int err;
6510 
6511 	err = check_load_mem(env, insn, true, false, false, "atomic_load");
6512 	if (err)
6513 		return err;
6514 
6515 	if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
6516 		verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
6517 			insn->src_reg,
6518 			reg_type_str(env, reg_state(env, insn->src_reg)->type));
6519 		return -EACCES;
6520 	}
6521 
6522 	return 0;
6523 }
6524 
6525 static int check_atomic_store(struct bpf_verifier_env *env,
6526 			      struct bpf_insn *insn)
6527 {
6528 	int err;
6529 
6530 	err = check_store_reg(env, insn, true);
6531 	if (err)
6532 		return err;
6533 
6534 	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
6535 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6536 			insn->dst_reg,
6537 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6538 		return -EACCES;
6539 	}
6540 
6541 	return 0;
6542 }
6543 
6544 static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn)
6545 {
6546 	switch (insn->imm) {
6547 	case BPF_ADD:
6548 	case BPF_ADD | BPF_FETCH:
6549 	case BPF_AND:
6550 	case BPF_AND | BPF_FETCH:
6551 	case BPF_OR:
6552 	case BPF_OR | BPF_FETCH:
6553 	case BPF_XOR:
6554 	case BPF_XOR | BPF_FETCH:
6555 	case BPF_XCHG:
6556 	case BPF_CMPXCHG:
6557 		return check_atomic_rmw(env, insn);
6558 	case BPF_LOAD_ACQ:
6559 		if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
6560 			verbose(env,
6561 				"64-bit load-acquires are only supported on 64-bit arches\n");
6562 			return -EOPNOTSUPP;
6563 		}
6564 		return check_atomic_load(env, insn);
6565 	case BPF_STORE_REL:
6566 		if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
6567 			verbose(env,
6568 				"64-bit store-releases are only supported on 64-bit arches\n");
6569 			return -EOPNOTSUPP;
6570 		}
6571 		return check_atomic_store(env, insn);
6572 	default:
6573 		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n",
6574 			insn->imm);
6575 		return -EINVAL;
6576 	}
6577 }
6578 
6579 /* When register 'regno' is used to read the stack (either directly or through
6580  * a helper function) make sure that it's within stack boundary and, depending
6581  * on the access type and privileges, that all elements of the stack are
6582  * initialized.
6583  *
6584  * All registers that have been spilled on the stack in the slots within the
6585  * read offsets are marked as read.
6586  */
6587 static int check_stack_range_initialized(
6588 		struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off,
6589 		int access_size, bool zero_size_allowed,
6590 		enum bpf_access_type type, struct bpf_call_arg_meta *meta)
6591 {
6592 	struct bpf_func_state *state = bpf_func(env, reg);
6593 	int err, min_off, max_off, i, j, slot, spi;
6594 	/* Some accesses can write anything into the stack, others are
6595 	 * read-only.
6596 	 */
6597 	bool clobber = type == BPF_WRITE;
6598 	/*
6599 	 * Negative access_size signals global subprog/kfunc arg check where
6600 	 * STACK_POISON slots are acceptable. static stack liveness
6601 	 * might have determined that subprog doesn't read them,
6602 	 * but BTF based global subprog validation isn't accurate enough.
6603 	 */
6604 	bool allow_poison = access_size < 0 || clobber;
6605 
6606 	access_size = abs(access_size);
6607 
6608 	if (access_size == 0 && !zero_size_allowed) {
6609 		verbose(env, "invalid zero-sized read\n");
6610 		return -EACCES;
6611 	}
6612 
6613 	err = check_stack_access_within_bounds(env, reg, argno, off, access_size, type);
6614 	if (err)
6615 		return err;
6616 
6617 
6618 	if (tnum_is_const(reg->var_off)) {
6619 		min_off = max_off = reg->var_off.value + off;
6620 	} else {
6621 		/* Variable offset is prohibited for unprivileged mode for
6622 		 * simplicity since it requires corresponding support in
6623 		 * Spectre masking for stack ALU.
6624 		 * See also retrieve_ptr_limit().
6625 		 */
6626 		if (!env->bypass_spec_v1) {
6627 			char tn_buf[48];
6628 
6629 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6630 			verbose(env, "%s variable offset stack access prohibited for !root, var_off=%s\n",
6631 				reg_arg_name(env, argno), tn_buf);
6632 			return -EACCES;
6633 		}
6634 		/* Only initialized buffer on stack is allowed to be accessed
6635 		 * with variable offset. With uninitialized buffer it's hard to
6636 		 * guarantee that whole memory is marked as initialized on
6637 		 * helper return since specific bounds are unknown what may
6638 		 * cause uninitialized stack leaking.
6639 		 */
6640 		if (meta && meta->raw_mode)
6641 			meta = NULL;
6642 
6643 		min_off = reg_smin(reg) + off;
6644 		max_off = reg_smax(reg) + off;
6645 	}
6646 
6647 	if (meta && meta->raw_mode) {
6648 		/* Ensure we won't be overwriting dynptrs when simulating byte
6649 		 * by byte access in check_helper_call using meta.access_size.
6650 		 * This would be a problem if we have a helper in the future
6651 		 * which takes:
6652 		 *
6653 		 *	helper(uninit_mem, len, dynptr)
6654 		 *
6655 		 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
6656 		 * may end up writing to dynptr itself when touching memory from
6657 		 * arg 1. This can be relaxed on a case by case basis for known
6658 		 * safe cases, but reject due to the possibilitiy of aliasing by
6659 		 * default.
6660 		 */
6661 		for (i = min_off; i < max_off + access_size; i++) {
6662 			int stack_off = -i - 1;
6663 
6664 			spi = bpf_get_spi(i);
6665 			/* raw_mode may write past allocated_stack */
6666 			if (state->allocated_stack <= stack_off)
6667 				continue;
6668 			if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
6669 				verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
6670 				return -EACCES;
6671 			}
6672 		}
6673 		meta->access_size = access_size;
6674 		meta->regno = reg_from_argno(argno);
6675 		return 0;
6676 	}
6677 
6678 	for (i = min_off; i < max_off + access_size; i++) {
6679 		u8 *stype;
6680 
6681 		slot = -i - 1;
6682 		spi = slot / BPF_REG_SIZE;
6683 		if (state->allocated_stack <= slot) {
6684 			verbose(env, "allocated_stack too small\n");
6685 			return -EFAULT;
6686 		}
6687 
6688 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
6689 		if (*stype == STACK_MISC)
6690 			goto mark;
6691 		if ((*stype == STACK_ZERO) ||
6692 		    (*stype == STACK_INVALID && env->allow_uninit_stack)) {
6693 			if (clobber) {
6694 				/* helper can write anything into the stack */
6695 				*stype = STACK_MISC;
6696 			}
6697 			goto mark;
6698 		}
6699 
6700 		if (bpf_is_spilled_reg(&state->stack[spi]) &&
6701 		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
6702 		     env->allow_ptr_leaks)) {
6703 			if (clobber) {
6704 				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
6705 				for (j = 0; j < BPF_REG_SIZE; j++)
6706 					scrub_spilled_slot(&state->stack[spi].slot_type[j]);
6707 			}
6708 			goto mark;
6709 		}
6710 
6711 		if (*stype == STACK_POISON) {
6712 			if (allow_poison)
6713 				goto mark;
6714 			verbose(env, "reading from stack %s off %d+%d size %d, slot poisoned by dead code elimination\n",
6715 				reg_arg_name(env, argno), min_off, i - min_off, access_size);
6716 		} else if (tnum_is_const(reg->var_off)) {
6717 			verbose(env, "invalid read from stack %s off %d+%d size %d\n",
6718 				reg_arg_name(env, argno), min_off, i - min_off, access_size);
6719 		} else {
6720 			char tn_buf[48];
6721 
6722 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6723 			verbose(env, "invalid read from stack %s var_off %s+%d size %d\n",
6724 				reg_arg_name(env, argno), tn_buf, i - min_off, access_size);
6725 		}
6726 		return -EACCES;
6727 mark:
6728 		;
6729 	}
6730 	return 0;
6731 }
6732 
6733 static int check_helper_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
6734 				   int access_size, enum bpf_access_type access_type,
6735 				   bool zero_size_allowed,
6736 				   struct bpf_call_arg_meta *meta)
6737 {
6738 	struct bpf_reg_state *regs = cur_regs(env);
6739 	u32 *max_access;
6740 
6741 	switch (base_type(reg->type)) {
6742 	case PTR_TO_PACKET:
6743 	case PTR_TO_PACKET_META:
6744 		return check_packet_access(env, reg, argno, 0, access_size,
6745 					   zero_size_allowed);
6746 	case PTR_TO_MAP_KEY:
6747 		if (access_type == BPF_WRITE) {
6748 			verbose(env, "%s cannot write into %s\n",
6749 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
6750 			return -EACCES;
6751 		}
6752 		return check_mem_region_access(env, reg, argno, 0, access_size,
6753 					       reg->map_ptr->key_size, false);
6754 	case PTR_TO_MAP_VALUE:
6755 		if (check_map_access_type(env, reg, 0, access_size, access_type))
6756 			return -EACCES;
6757 		return check_map_access(env, reg, argno, 0, access_size,
6758 					zero_size_allowed, ACCESS_HELPER);
6759 	case PTR_TO_MEM:
6760 		if (type_is_rdonly_mem(reg->type)) {
6761 			if (access_type == BPF_WRITE) {
6762 				verbose(env, "%s cannot write into %s\n",
6763 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
6764 				return -EACCES;
6765 			}
6766 		}
6767 		return check_mem_region_access(env, reg, argno, 0,
6768 					       access_size, reg->mem_size,
6769 					       zero_size_allowed);
6770 	case PTR_TO_BUF:
6771 		if (type_is_rdonly_mem(reg->type)) {
6772 			if (access_type == BPF_WRITE) {
6773 				verbose(env, "%s cannot write into %s\n",
6774 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
6775 				return -EACCES;
6776 			}
6777 
6778 			max_access = &env->prog->aux->max_rdonly_access;
6779 		} else {
6780 			max_access = &env->prog->aux->max_rdwr_access;
6781 		}
6782 		return check_buffer_access(env, reg, argno, 0,
6783 					   access_size, zero_size_allowed,
6784 					   max_access);
6785 	case PTR_TO_STACK:
6786 		return check_stack_range_initialized(
6787 				env, reg,
6788 				argno, 0, access_size,
6789 				zero_size_allowed, access_type, meta);
6790 	case PTR_TO_BTF_ID:
6791 		return check_ptr_to_btf_access(env, regs, reg, argno, 0,
6792 					       access_size, access_type, -1);
6793 	case PTR_TO_CTX:
6794 		/* Only permit reading or writing syscall context using helper calls. */
6795 		if (is_var_ctx_off_allowed(env->prog)) {
6796 			int err = check_mem_region_access(env, reg, argno, 0, access_size, U16_MAX,
6797 							  zero_size_allowed);
6798 			if (err)
6799 				return err;
6800 			if (env->prog->aux->max_ctx_offset < reg_umax(reg) + access_size)
6801 				env->prog->aux->max_ctx_offset = reg_umax(reg) + access_size;
6802 			return 0;
6803 		}
6804 		fallthrough;
6805 	default: /* scalar_value or invalid ptr */
6806 		/* Allow zero-byte read from NULL, regardless of pointer type */
6807 		if (zero_size_allowed && access_size == 0 &&
6808 		    bpf_register_is_null(reg))
6809 			return 0;
6810 
6811 		verbose(env, "%s type=%s ", reg_arg_name(env, argno),
6812 			reg_type_str(env, reg->type));
6813 		verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
6814 		return -EACCES;
6815 	}
6816 }
6817 
6818 /* verify arguments to helpers or kfuncs consisting of a pointer and an access
6819  * size.
6820  *
6821  * @mem_reg contains the pointer, @size_reg contains the access size.
6822  */
6823 static int check_mem_size_reg(struct bpf_verifier_env *env,
6824 			      struct bpf_reg_state *mem_reg,
6825 			      struct bpf_reg_state *size_reg, argno_t mem_argno,
6826 			      argno_t size_argno, enum bpf_access_type access_type,
6827 			      bool zero_size_allowed,
6828 			      struct bpf_call_arg_meta *meta)
6829 {
6830 	int err;
6831 
6832 	/* This is used to refine r0 return value bounds for helpers
6833 	 * that enforce this value as an upper bound on return values.
6834 	 * See do_refine_retval_range() for helpers that can refine
6835 	 * the return value. C type of helper is u32 so we pull register
6836 	 * bound from umax_value however, if negative verifier errors
6837 	 * out. Only upper bounds can be learned because retval is an
6838 	 * int type and negative retvals are allowed.
6839 	 */
6840 	meta->msize_max_value = reg_umax(size_reg);
6841 
6842 	/* The register is SCALAR_VALUE; the access check happens using
6843 	 * its boundaries. For unprivileged variable accesses, disable
6844 	 * raw mode so that the program is required to initialize all
6845 	 * the memory that the helper could just partially fill up.
6846 	 */
6847 	if (!tnum_is_const(size_reg->var_off))
6848 		meta = NULL;
6849 
6850 	if (reg_smin(size_reg) < 0) {
6851 		verbose(env, "%s min value is negative, either use unsigned or 'var &= const'\n",
6852 			reg_arg_name(env, size_argno));
6853 		return -EACCES;
6854 	}
6855 
6856 	if (reg_umin(size_reg) == 0 && !zero_size_allowed) {
6857 		verbose(env, "%s invalid zero-sized read: u64=[%lld,%lld]\n",
6858 			reg_arg_name(env, size_argno), reg_umin(size_reg), reg_umax(size_reg));
6859 		return -EACCES;
6860 	}
6861 
6862 	if (reg_umax(size_reg) >= BPF_MAX_VAR_SIZ) {
6863 		verbose(env, "%s unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
6864 			reg_arg_name(env, size_argno));
6865 		return -EACCES;
6866 	}
6867 	err = check_helper_mem_access(env, mem_reg, mem_argno, reg_umax(size_reg),
6868 				      access_type, zero_size_allowed, meta);
6869 	if (!err) {
6870 		int regno = reg_from_argno(size_argno);
6871 
6872 		if (regno >= 0)
6873 			err = mark_chain_precision(env, regno);
6874 		else
6875 			err = mark_stack_arg_precision(env, arg_idx_from_argno(size_argno));
6876 	}
6877 	return err;
6878 }
6879 
6880 static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
6881 			 argno_t argno, u32 mem_size)
6882 {
6883 	bool may_be_null = type_may_be_null(reg->type);
6884 	struct bpf_reg_state saved_reg;
6885 	int err;
6886 
6887 	if (bpf_register_is_null(reg))
6888 		return 0;
6889 
6890 	if (mem_size > S32_MAX) {
6891 		verbose(env, "%s memory size %u is too large\n",
6892 			reg_arg_name(env, argno), mem_size);
6893 		return -EACCES;
6894 	}
6895 
6896 	/* Assuming that the register contains a value check if the memory
6897 	 * access is safe. Temporarily save and restore the register's state as
6898 	 * the conversion shouldn't be visible to a caller.
6899 	 */
6900 	if (may_be_null) {
6901 		saved_reg = *reg;
6902 		mark_ptr_not_null_reg(reg);
6903 	}
6904 
6905 	int size = base_type(reg->type) == PTR_TO_STACK ? -(int)mem_size : mem_size;
6906 
6907 	err = check_helper_mem_access(env, reg, argno, size, BPF_READ, true, NULL);
6908 	err = err ?: check_helper_mem_access(env, reg, argno, size, BPF_WRITE, true, NULL);
6909 
6910 	if (may_be_null)
6911 		*reg = saved_reg;
6912 
6913 	return err;
6914 }
6915 
6916 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *mem_reg,
6917 				    struct bpf_reg_state *size_reg, argno_t mem_argno, argno_t size_argno)
6918 {
6919 	bool may_be_null = type_may_be_null(mem_reg->type);
6920 	struct bpf_reg_state saved_reg;
6921 	struct bpf_call_arg_meta meta;
6922 	int err;
6923 
6924 	memset(&meta, 0, sizeof(meta));
6925 
6926 	if (may_be_null) {
6927 		saved_reg = *mem_reg;
6928 		mark_ptr_not_null_reg(mem_reg);
6929 	}
6930 
6931 	err = check_mem_size_reg(env, mem_reg, size_reg, mem_argno, size_argno, BPF_READ, true, &meta);
6932 	err = err ?: check_mem_size_reg(env, mem_reg, size_reg, mem_argno, size_argno, BPF_WRITE, true, &meta);
6933 
6934 	if (may_be_null)
6935 		*mem_reg = saved_reg;
6936 
6937 	return err;
6938 }
6939 
6940 enum {
6941 	PROCESS_SPIN_LOCK = (1 << 0),
6942 	PROCESS_RES_LOCK  = (1 << 1),
6943 	PROCESS_LOCK_IRQ  = (1 << 2),
6944 };
6945 
6946 /* Implementation details:
6947  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
6948  * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
6949  * Two bpf_map_lookups (even with the same key) will have different reg->id.
6950  * Two separate bpf_obj_new will also have different reg->id.
6951  * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
6952  * clears reg->id after value_or_null->value transition, since the verifier only
6953  * cares about the range of access to valid map value pointer and doesn't care
6954  * about actual address of the map element.
6955  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
6956  * reg->id > 0 after value_or_null->value transition. By doing so
6957  * two bpf_map_lookups will be considered two different pointers that
6958  * point to different bpf_spin_locks. Likewise for pointers to allocated objects
6959  * returned from bpf_obj_new.
6960  * The verifier allows taking only one bpf_spin_lock at a time to avoid
6961  * dead-locks.
6962  * Since only one bpf_spin_lock is allowed the checks are simpler than
6963  * reg_is_refcounted() logic. The verifier needs to remember only
6964  * one spin_lock instead of array of acquired_refs.
6965  * env->cur_state->active_locks remembers which map value element or allocated
6966  * object got locked and clears it after bpf_spin_unlock.
6967  */
6968 static int process_spin_lock(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int flags)
6969 {
6970 	bool is_lock = flags & PROCESS_SPIN_LOCK, is_res_lock = flags & PROCESS_RES_LOCK;
6971 	const char *lock_str = is_res_lock ? "bpf_res_spin" : "bpf_spin";
6972 	struct bpf_verifier_state *cur = env->cur_state;
6973 	bool is_const = tnum_is_const(reg->var_off);
6974 	bool is_irq = flags & PROCESS_LOCK_IRQ;
6975 	u64 val = reg->var_off.value;
6976 	struct bpf_map *map = NULL;
6977 	struct btf *btf = NULL;
6978 	struct btf_record *rec;
6979 	u32 spin_lock_off;
6980 	int err;
6981 
6982 	if (!is_const) {
6983 		verbose(env,
6984 			"%s doesn't have constant offset. %s_lock has to be at the constant offset\n",
6985 			reg_arg_name(env, argno), lock_str);
6986 		return -EINVAL;
6987 	}
6988 	if (reg->type == PTR_TO_MAP_VALUE) {
6989 		map = reg->map_ptr;
6990 		if (!map->btf) {
6991 			verbose(env,
6992 				"map '%s' has to have BTF in order to use %s_lock\n",
6993 				map->name, lock_str);
6994 			return -EINVAL;
6995 		}
6996 	} else {
6997 		btf = reg->btf;
6998 	}
6999 
7000 	rec = reg_btf_record(reg);
7001 	if (!btf_record_has_field(rec, is_res_lock ? BPF_RES_SPIN_LOCK : BPF_SPIN_LOCK)) {
7002 		verbose(env, "%s '%s' has no valid %s_lock\n", map ? "map" : "local",
7003 			map ? map->name : "kptr", lock_str);
7004 		return -EINVAL;
7005 	}
7006 	spin_lock_off = is_res_lock ? rec->res_spin_lock_off : rec->spin_lock_off;
7007 	if (spin_lock_off != val) {
7008 		verbose(env, "off %lld doesn't point to 'struct %s_lock' that is at %d\n",
7009 			val, lock_str, spin_lock_off);
7010 		return -EINVAL;
7011 	}
7012 	if (is_lock) {
7013 		void *ptr;
7014 		int type;
7015 
7016 		if (map)
7017 			ptr = map;
7018 		else
7019 			ptr = btf;
7020 
7021 		if (!is_res_lock && cur->active_locks) {
7022 			if (find_lock_state(env->cur_state, REF_TYPE_LOCK, 0, NULL)) {
7023 				verbose(env,
7024 					"Locking two bpf_spin_locks are not allowed\n");
7025 				return -EINVAL;
7026 			}
7027 		} else if (is_res_lock && cur->active_locks) {
7028 			if (find_lock_state(env->cur_state, REF_TYPE_RES_LOCK | REF_TYPE_RES_LOCK_IRQ, reg->id, ptr)) {
7029 				verbose(env, "Acquiring the same lock again, AA deadlock detected\n");
7030 				return -EINVAL;
7031 			}
7032 		}
7033 
7034 		if (is_res_lock && is_irq)
7035 			type = REF_TYPE_RES_LOCK_IRQ;
7036 		else if (is_res_lock)
7037 			type = REF_TYPE_RES_LOCK;
7038 		else
7039 			type = REF_TYPE_LOCK;
7040 		err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr);
7041 		if (err < 0) {
7042 			verbose(env, "Failed to acquire lock state\n");
7043 			return err;
7044 		}
7045 	} else {
7046 		void *ptr;
7047 		int type;
7048 
7049 		if (map)
7050 			ptr = map;
7051 		else
7052 			ptr = btf;
7053 
7054 		if (!cur->active_locks) {
7055 			verbose(env, "%s_unlock without taking a lock\n", lock_str);
7056 			return -EINVAL;
7057 		}
7058 
7059 		if (is_res_lock && is_irq)
7060 			type = REF_TYPE_RES_LOCK_IRQ;
7061 		else if (is_res_lock)
7062 			type = REF_TYPE_RES_LOCK;
7063 		else
7064 			type = REF_TYPE_LOCK;
7065 		if (!find_lock_state(cur, type, reg->id, ptr)) {
7066 			verbose(env, "%s_unlock of different lock\n", lock_str);
7067 			return -EINVAL;
7068 		}
7069 		if (reg->id != cur->active_lock_id || ptr != cur->active_lock_ptr) {
7070 			verbose(env, "%s_unlock cannot be out of order\n", lock_str);
7071 			return -EINVAL;
7072 		}
7073 		if (release_lock_state(cur, type, reg->id, ptr)) {
7074 			verbose(env, "%s_unlock of different lock\n", lock_str);
7075 			return -EINVAL;
7076 		}
7077 
7078 		invalidate_non_owning_refs(env);
7079 	}
7080 	return 0;
7081 }
7082 
7083 /* Check if @regno is a pointer to a specific field in a map value */
7084 static int check_map_field_pointer(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7085 				   enum btf_field_type field_type,
7086 				   struct bpf_map_desc *map_desc)
7087 {
7088 	bool is_const = tnum_is_const(reg->var_off);
7089 	struct bpf_map *map = reg->map_ptr;
7090 	u64 val = reg->var_off.value;
7091 	const char *struct_name = btf_field_type_name(field_type);
7092 	int field_off = -1;
7093 
7094 	if (!is_const) {
7095 		verbose(env,
7096 			"%s doesn't have constant offset. %s has to be at the constant offset\n",
7097 			reg_arg_name(env, argno), struct_name);
7098 		return -EINVAL;
7099 	}
7100 	if (!map->btf) {
7101 		verbose(env, "map '%s' has to have BTF in order to use %s\n", map->name,
7102 			struct_name);
7103 		return -EINVAL;
7104 	}
7105 	if (!btf_record_has_field(map->record, field_type)) {
7106 		verbose(env, "map '%s' has no valid %s\n", map->name, struct_name);
7107 		return -EINVAL;
7108 	}
7109 	switch (field_type) {
7110 	case BPF_TIMER:
7111 		field_off = map->record->timer_off;
7112 		break;
7113 	case BPF_TASK_WORK:
7114 		field_off = map->record->task_work_off;
7115 		break;
7116 	case BPF_WORKQUEUE:
7117 		field_off = map->record->wq_off;
7118 		break;
7119 	default:
7120 		verifier_bug(env, "unsupported BTF field type: %s\n", struct_name);
7121 		return -EINVAL;
7122 	}
7123 	if (field_off != val) {
7124 		verbose(env, "off %lld doesn't point to 'struct %s' that is at %d\n",
7125 			val, struct_name, field_off);
7126 		return -EINVAL;
7127 	}
7128 	if (map_desc->ptr) {
7129 		verifier_bug(env, "Two map pointers in a %s helper", struct_name);
7130 		return -EFAULT;
7131 	}
7132 	map_desc->uid = reg->map_uid;
7133 	map_desc->ptr = map;
7134 	return 0;
7135 }
7136 
7137 static int process_timer_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7138 			      struct bpf_map_desc *map)
7139 {
7140 	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
7141 		verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n");
7142 		return -EOPNOTSUPP;
7143 	}
7144 	return check_map_field_pointer(env, reg, argno, BPF_TIMER, map);
7145 }
7146 
7147 static int process_timer_helper(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7148 				struct bpf_call_arg_meta *meta)
7149 {
7150 	return process_timer_func(env, reg, argno, &meta->map);
7151 }
7152 
7153 static int process_timer_kfunc(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7154 			       struct bpf_kfunc_call_arg_meta *meta)
7155 {
7156 	return process_timer_func(env, reg, argno, &meta->map);
7157 }
7158 
7159 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
7160 			     struct bpf_call_arg_meta *meta)
7161 {
7162 	struct bpf_reg_state *reg = reg_state(env, regno);
7163 	struct btf_field *kptr_field;
7164 	struct bpf_map *map_ptr;
7165 	struct btf_record *rec;
7166 	u32 kptr_off;
7167 
7168 	if (type_is_ptr_alloc_obj(reg->type)) {
7169 		rec = reg_btf_record(reg);
7170 	} else { /* PTR_TO_MAP_VALUE */
7171 		map_ptr = reg->map_ptr;
7172 		if (!map_ptr->btf) {
7173 			verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
7174 				map_ptr->name);
7175 			return -EINVAL;
7176 		}
7177 		rec = map_ptr->record;
7178 		meta->map.ptr = map_ptr;
7179 	}
7180 
7181 	if (!tnum_is_const(reg->var_off)) {
7182 		verbose(env,
7183 			"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
7184 			regno);
7185 		return -EINVAL;
7186 	}
7187 
7188 	if (!btf_record_has_field(rec, BPF_KPTR)) {
7189 		verbose(env, "R%d has no valid kptr\n", regno);
7190 		return -EINVAL;
7191 	}
7192 
7193 	kptr_off = reg->var_off.value;
7194 	kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
7195 	if (!kptr_field) {
7196 		verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
7197 		return -EACCES;
7198 	}
7199 	if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
7200 		verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
7201 		return -EACCES;
7202 	}
7203 	meta->kptr_field = kptr_field;
7204 	return 0;
7205 }
7206 
7207 /*
7208  * Validate dynptr arguments for helper, kfunc and subprog.
7209  *
7210  * @dynptr is both input and output. It is populated when the argument is
7211  * tagged with MEM_UNINIT (i.e., the dynptr argument that will be constructed)
7212  * and consumed when the argument is expecting to be an initialized dynptr.
7213  * @parent_id is used to track the referenced parent object (e.g., file or skb in
7214  * qdisc program) when constructing a dynptr.
7215  *
7216  * There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
7217  * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
7218  *
7219  * In both cases we deal with the first 8 bytes, but need to mark the next 8
7220  * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
7221  * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
7222  *
7223  * Mutability of bpf_dynptr is at two levels: the dynptr and the memory the
7224  * dynptr points to. At the first level, the verifier will make sure a
7225  * CONST_PTR_TO_DYNPTR cannot be reinitialized or destroyed. The mutability of
7226  * a dynptr's view (i.e., start and offset) is not tracked as there is not such
7227  * use case. The second level is tracked using the upper bit of bpf_dynptr->size
7228  * and checked dynamically during runtime.
7229  */
7230 static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7231 			       argno_t argno, int insn_idx, enum bpf_arg_type arg_type,
7232 			       struct ref_obj_desc *ref_obj, struct bpf_dynptr_desc *dynptr)
7233 {
7234 	int spi, err = 0;
7235 
7236 	if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) {
7237 		verbose(env,
7238 			"%s expected pointer to stack or const struct bpf_dynptr\n",
7239 			reg_arg_name(env, argno));
7240 		return -EINVAL;
7241 	}
7242 
7243 	/*  MEM_UNINIT - Points to memory that is an appropriate candidate for
7244 	 *		 constructing a mutable bpf_dynptr object.
7245 	 *
7246 	 *		 Currently, this is only possible with PTR_TO_STACK
7247 	 *		 pointing to a region of at least 16 bytes which doesn't
7248 	 *		 contain an existing bpf_dynptr.
7249 	 *
7250 	 *  OBJ_RELEASE - Points to a initialized bpf_dynptr that will be
7251 	 *		  destroyed.
7252 	 *
7253 	 *  None       - Points to a initialized dynptr that cannot be
7254 	 *		 reinitialized or destroyed. However, the view of the
7255 	 *		 dynptr and the memory it points to may be mutated.
7256 	 */
7257 	if (arg_type & MEM_UNINIT) {
7258 		int i;
7259 
7260 		if (!is_dynptr_reg_valid_uninit(env, reg)) {
7261 			verbose(env, "Dynptr has to be an uninitialized dynptr\n");
7262 			return -EINVAL;
7263 		}
7264 
7265 		/* we write BPF_DW bits (8 bytes) at a time */
7266 		for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
7267 			err = check_mem_access(env, insn_idx, reg, argno,
7268 					       i, BPF_DW, BPF_WRITE, -1, false, false);
7269 			if (err)
7270 				return err;
7271 		}
7272 
7273 		err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, ref_obj, dynptr);
7274 	} else /* OBJ_RELEASE and None case from above */ {
7275 		/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
7276 		if (reg->type == CONST_PTR_TO_DYNPTR && (arg_type & OBJ_RELEASE)) {
7277 			verbose(env, "CONST_PTR_TO_DYNPTR cannot be released\n");
7278 			return -EINVAL;
7279 		}
7280 
7281 		if (!is_dynptr_reg_valid_init(env, reg)) {
7282 			verbose(env, "Expected an initialized dynptr as %s\n",
7283 				reg_arg_name(env, argno));
7284 			return -EINVAL;
7285 		}
7286 
7287 		/* Fold modifiers (in this case, OBJ_RELEASE) when checking expected type */
7288 		if (!is_dynptr_type_expected(env, reg, arg_type & ~OBJ_RELEASE)) {
7289 			verbose(env,
7290 				"Expected a dynptr of type %s as %s\n",
7291 				dynptr_type_str(arg_to_dynptr_type(arg_type)),
7292 				reg_arg_name(env, argno));
7293 			return -EINVAL;
7294 		}
7295 
7296 		if (reg->type != CONST_PTR_TO_DYNPTR) {
7297 			struct bpf_func_state *state = bpf_func(env, reg);
7298 
7299 			spi = dynptr_get_spi(env, reg);
7300 			if (spi < 0)
7301 				return spi;
7302 
7303 			/*
7304 			 * For CONST_PTR_TO_DYNPTR, reg is already scratched by check_reg_arg
7305 			 * in check_helper_call and mark_btf_func_reg_size in check_kfunc_call.
7306 			 */
7307 			mark_stack_slots_scratched(env, spi, BPF_DYNPTR_NR_SLOTS);
7308 
7309 			reg = &state->stack[spi].spilled_ptr;
7310 		}
7311 
7312 		if (dynptr) {
7313 			dynptr->type = reg->dynptr.type;
7314 			dynptr->id = reg->id;
7315 			dynptr->parent_id = reg->parent_id;
7316 		}
7317 	}
7318 	return err;
7319 }
7320 
7321 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7322 {
7323 	return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
7324 }
7325 
7326 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7327 {
7328 	return meta->kfunc_flags & KF_ITER_NEW;
7329 }
7330 
7331 
7332 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7333 {
7334 	return meta->kfunc_flags & KF_ITER_DESTROY;
7335 }
7336 
7337 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx,
7338 			      const struct btf_param *arg)
7339 {
7340 	/* btf_check_iter_kfuncs() guarantees that first argument of any iter
7341 	 * kfunc is iter state pointer
7342 	 */
7343 	if (is_iter_kfunc(meta))
7344 		return arg_idx == 0;
7345 
7346 	/* iter passed as an argument to a generic kfunc */
7347 	return btf_param_match_suffix(meta->btf, arg, "__iter");
7348 }
7349 
7350 static int process_iter_arg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int insn_idx,
7351 			    struct bpf_kfunc_call_arg_meta *meta)
7352 {
7353 	struct bpf_func_state *state = bpf_func(env, reg);
7354 	const struct btf_type *t;
7355 	u32 arg_idx = arg_idx_from_argno(argno);
7356 	int spi, err, i, nr_slots, btf_id;
7357 
7358 	if (reg->type != PTR_TO_STACK) {
7359 		verbose(env, "%s expected pointer to an iterator on stack\n",
7360 			reg_arg_name(env, argno));
7361 		return -EINVAL;
7362 	}
7363 
7364 	/* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
7365 	 * ensures struct convention, so we wouldn't need to do any BTF
7366 	 * validation here. But given iter state can be passed as a parameter
7367 	 * to any kfunc, if arg has "__iter" suffix, we need to be a bit more
7368 	 * conservative here.
7369 	 */
7370 	btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, arg_idx);
7371 	if (btf_id < 0) {
7372 		verbose(env, "expected valid iter pointer as %s\n",
7373 			reg_arg_name(env, argno));
7374 		return -EINVAL;
7375 	}
7376 	t = btf_type_by_id(meta->btf, btf_id);
7377 	nr_slots = t->size / BPF_REG_SIZE;
7378 
7379 	if (is_iter_new_kfunc(meta)) {
7380 		/* bpf_iter_<type>_new() expects pointer to uninit iter state */
7381 		if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
7382 			verbose(env, "expected uninitialized iter_%s as %s\n",
7383 				iter_type_str(meta->btf, btf_id), reg_arg_name(env, argno));
7384 			return -EINVAL;
7385 		}
7386 
7387 		for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) {
7388 			err = check_mem_access(env, insn_idx, reg, argno,
7389 					       i, BPF_DW, BPF_WRITE, -1, false, false);
7390 			if (err)
7391 				return err;
7392 		}
7393 
7394 		err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots);
7395 		if (err)
7396 			return err;
7397 	} else {
7398 		/* iter_next() or iter_destroy(), as well as any kfunc
7399 		 * accepting iter argument, expect initialized iter state
7400 		 */
7401 		err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
7402 		switch (err) {
7403 		case 0:
7404 			break;
7405 		case -EINVAL:
7406 			verbose(env, "expected an initialized iter_%s as %s\n",
7407 				iter_type_str(meta->btf, btf_id), reg_arg_name(env, argno));
7408 			return err;
7409 		case -EPROTO:
7410 			verbose(env, "expected an RCU CS when using %s\n", meta->func_name);
7411 			return err;
7412 		default:
7413 			return err;
7414 		}
7415 
7416 		spi = iter_get_spi(env, reg, nr_slots);
7417 		if (spi < 0)
7418 			return spi;
7419 
7420 		mark_stack_slots_scratched(env, spi, nr_slots);
7421 
7422 		/* remember meta->iter info for process_iter_next_call() */
7423 		meta->iter.spi = spi;
7424 		meta->iter.frameno = reg->frameno;
7425 		update_ref_obj(&meta->ref_obj, &state->stack[spi].spilled_ptr);
7426 
7427 		if (is_iter_destroy_kfunc(meta)) {
7428 			err = unmark_stack_slots_iter(env, reg, nr_slots);
7429 			if (err)
7430 				return err;
7431 		}
7432 	}
7433 
7434 	return 0;
7435 }
7436 
7437 /* Look for a previous loop entry at insn_idx: nearest parent state
7438  * stopped at insn_idx with callsites matching those in cur->frame.
7439  */
7440 static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
7441 						  struct bpf_verifier_state *cur,
7442 						  int insn_idx)
7443 {
7444 	struct bpf_verifier_state_list *sl;
7445 	struct bpf_verifier_state *st;
7446 	struct list_head *pos, *head;
7447 
7448 	/* Explored states are pushed in stack order, most recent states come first */
7449 	head = bpf_explored_state(env, insn_idx);
7450 	list_for_each(pos, head) {
7451 		sl = container_of(pos, struct bpf_verifier_state_list, node);
7452 		/* If st->branches != 0 state is a part of current DFS verification path,
7453 		 * hence cur & st for a loop.
7454 		 */
7455 		st = &sl->state;
7456 		if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
7457 		    st->dfs_depth < cur->dfs_depth)
7458 			return st;
7459 	}
7460 
7461 	return NULL;
7462 }
7463 
7464 /*
7465  * Check if scalar registers are exact for the purpose of not widening.
7466  * More lenient than regs_exact()
7467  */
7468 static bool scalars_exact_for_widen(const struct bpf_reg_state *rold,
7469 				    const struct bpf_reg_state *rcur)
7470 {
7471 	return !memcmp(rold, rcur, offsetof(struct bpf_reg_state, id));
7472 }
7473 
7474 static void maybe_widen_reg(struct bpf_verifier_env *env,
7475 			    struct bpf_reg_state *rold, struct bpf_reg_state *rcur)
7476 {
7477 	if (rold->type != SCALAR_VALUE)
7478 		return;
7479 	if (rold->type != rcur->type)
7480 		return;
7481 	if (rold->precise || rcur->precise || scalars_exact_for_widen(rold, rcur))
7482 		return;
7483 	__mark_reg_unknown(env, rcur);
7484 }
7485 
7486 static int widen_imprecise_scalars(struct bpf_verifier_env *env,
7487 				   struct bpf_verifier_state *old,
7488 				   struct bpf_verifier_state *cur)
7489 {
7490 	struct bpf_func_state *fold, *fcur;
7491 	int i, fr, num_slots;
7492 
7493 	for (fr = old->curframe; fr >= 0; fr--) {
7494 		fold = old->frame[fr];
7495 		fcur = cur->frame[fr];
7496 
7497 		for (i = 0; i < MAX_BPF_REG; i++)
7498 			maybe_widen_reg(env,
7499 					&fold->regs[i],
7500 					&fcur->regs[i]);
7501 
7502 		num_slots = min(fold->allocated_stack / BPF_REG_SIZE,
7503 				fcur->allocated_stack / BPF_REG_SIZE);
7504 		for (i = 0; i < num_slots; i++) {
7505 			if (!bpf_is_spilled_reg(&fold->stack[i]) ||
7506 			    !bpf_is_spilled_reg(&fcur->stack[i]))
7507 				continue;
7508 
7509 			maybe_widen_reg(env,
7510 					&fold->stack[i].spilled_ptr,
7511 					&fcur->stack[i].spilled_ptr);
7512 		}
7513 	}
7514 	return 0;
7515 }
7516 
7517 static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st,
7518 						 struct bpf_kfunc_call_arg_meta *meta)
7519 {
7520 	int iter_frameno = meta->iter.frameno;
7521 	int iter_spi = meta->iter.spi;
7522 
7523 	return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
7524 }
7525 
7526 /* process_iter_next_call() is called when verifier gets to iterator's next
7527  * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
7528  * to it as just "iter_next()" in comments below.
7529  *
7530  * BPF verifier relies on a crucial contract for any iter_next()
7531  * implementation: it should *eventually* return NULL, and once that happens
7532  * it should keep returning NULL. That is, once iterator exhausts elements to
7533  * iterate, it should never reset or spuriously return new elements.
7534  *
7535  * With the assumption of such contract, process_iter_next_call() simulates
7536  * a fork in the verifier state to validate loop logic correctness and safety
7537  * without having to simulate infinite amount of iterations.
7538  *
7539  * In current state, we first assume that iter_next() returned NULL and
7540  * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
7541  * conditions we should not form an infinite loop and should eventually reach
7542  * exit.
7543  *
7544  * Besides that, we also fork current state and enqueue it for later
7545  * verification. In a forked state we keep iterator state as ACTIVE
7546  * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
7547  * also bump iteration depth to prevent erroneous infinite loop detection
7548  * later on (see iter_active_depths_differ() comment for details). In this
7549  * state we assume that we'll eventually loop back to another iter_next()
7550  * calls (it could be in exactly same location or in some other instruction,
7551  * it doesn't matter, we don't make any unnecessary assumptions about this,
7552  * everything revolves around iterator state in a stack slot, not which
7553  * instruction is calling iter_next()). When that happens, we either will come
7554  * to iter_next() with equivalent state and can conclude that next iteration
7555  * will proceed in exactly the same way as we just verified, so it's safe to
7556  * assume that loop converges. If not, we'll go on another iteration
7557  * simulation with a different input state, until all possible starting states
7558  * are validated or we reach maximum number of instructions limit.
7559  *
7560  * This way, we will either exhaustively discover all possible input states
7561  * that iterator loop can start with and eventually will converge, or we'll
7562  * effectively regress into bounded loop simulation logic and either reach
7563  * maximum number of instructions if loop is not provably convergent, or there
7564  * is some statically known limit on number of iterations (e.g., if there is
7565  * an explicit `if n > 100 then break;` statement somewhere in the loop).
7566  *
7567  * Iteration convergence logic in is_state_visited() relies on exact
7568  * states comparison, which ignores read and precision marks.
7569  * This is necessary because read and precision marks are not finalized
7570  * while in the loop. Exact comparison might preclude convergence for
7571  * simple programs like below:
7572  *
7573  *     i = 0;
7574  *     while(iter_next(&it))
7575  *       i++;
7576  *
7577  * At each iteration step i++ would produce a new distinct state and
7578  * eventually instruction processing limit would be reached.
7579  *
7580  * To avoid such behavior speculatively forget (widen) range for
7581  * imprecise scalar registers, if those registers were not precise at the
7582  * end of the previous iteration and do not match exactly.
7583  *
7584  * This is a conservative heuristic that allows to verify wide range of programs,
7585  * however it precludes verification of programs that conjure an
7586  * imprecise value on the first loop iteration and use it as precise on a second.
7587  * For example, the following safe program would fail to verify:
7588  *
7589  *     struct bpf_num_iter it;
7590  *     int arr[10];
7591  *     int i = 0, a = 0;
7592  *     bpf_iter_num_new(&it, 0, 10);
7593  *     while (bpf_iter_num_next(&it)) {
7594  *       if (a == 0) {
7595  *         a = 1;
7596  *         i = 7; // Because i changed verifier would forget
7597  *                // it's range on second loop entry.
7598  *       } else {
7599  *         arr[i] = 42; // This would fail to verify.
7600  *       }
7601  *     }
7602  *     bpf_iter_num_destroy(&it);
7603  */
7604 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
7605 				  struct bpf_kfunc_call_arg_meta *meta)
7606 {
7607 	struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
7608 	struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
7609 	struct bpf_reg_state *cur_iter, *queued_iter;
7610 
7611 	BTF_TYPE_EMIT(struct bpf_iter);
7612 
7613 	cur_iter = get_iter_from_state(cur_st, meta);
7614 
7615 	if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
7616 	    cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
7617 		verifier_bug(env, "unexpected iterator state %d (%s)",
7618 			     cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
7619 		return -EFAULT;
7620 	}
7621 
7622 	if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
7623 		/* Because iter_next() call is a checkpoint is_state_visitied()
7624 		 * should guarantee parent state with same call sites and insn_idx.
7625 		 */
7626 		if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
7627 		    !same_callsites(cur_st->parent, cur_st)) {
7628 			verifier_bug(env, "bad parent state for iter next call");
7629 			return -EFAULT;
7630 		}
7631 		/* Note cur_st->parent in the call below, it is necessary to skip
7632 		 * checkpoint created for cur_st by is_state_visited()
7633 		 * right at this instruction.
7634 		 */
7635 		prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
7636 		/* branch out active iter state */
7637 		queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
7638 		if (IS_ERR(queued_st))
7639 			return PTR_ERR(queued_st);
7640 
7641 		queued_iter = get_iter_from_state(queued_st, meta);
7642 		queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
7643 		queued_iter->iter.depth++;
7644 		if (prev_st)
7645 			widen_imprecise_scalars(env, prev_st, queued_st);
7646 
7647 		queued_fr = queued_st->frame[queued_st->curframe];
7648 		mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
7649 	}
7650 
7651 	/* switch to DRAINED state, but keep the depth unchanged */
7652 	/* mark current iter state as drained and assume returned NULL */
7653 	cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
7654 	__mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]);
7655 
7656 	return 0;
7657 }
7658 
7659 static bool arg_type_is_mem_size(enum bpf_arg_type type)
7660 {
7661 	return type == ARG_CONST_SIZE ||
7662 	       type == ARG_CONST_SIZE_OR_ZERO;
7663 }
7664 
7665 static bool arg_type_is_raw_mem(enum bpf_arg_type type)
7666 {
7667 	return base_type(type) == ARG_PTR_TO_MEM &&
7668 	       type & MEM_UNINIT;
7669 }
7670 
7671 static bool arg_type_is_release(enum bpf_arg_type type)
7672 {
7673 	return type & OBJ_RELEASE;
7674 }
7675 
7676 static bool arg_type_is_dynptr(enum bpf_arg_type type)
7677 {
7678 	return base_type(type) == ARG_PTR_TO_DYNPTR;
7679 }
7680 
7681 static int resolve_map_arg_type(struct bpf_verifier_env *env,
7682 				 const struct bpf_call_arg_meta *meta,
7683 				 enum bpf_arg_type *arg_type)
7684 {
7685 	if (!meta->map.ptr) {
7686 		/* kernel subsystem misconfigured verifier */
7687 		verifier_bug(env, "invalid map_ptr to access map->type");
7688 		return -EFAULT;
7689 	}
7690 
7691 	switch (meta->map.ptr->map_type) {
7692 	case BPF_MAP_TYPE_SOCKMAP:
7693 	case BPF_MAP_TYPE_SOCKHASH:
7694 		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
7695 			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
7696 		} else {
7697 			verbose(env, "invalid arg_type for sockmap/sockhash\n");
7698 			return -EINVAL;
7699 		}
7700 		break;
7701 	case BPF_MAP_TYPE_BLOOM_FILTER:
7702 		if (meta->func_id == BPF_FUNC_map_peek_elem)
7703 			*arg_type = ARG_PTR_TO_MAP_VALUE;
7704 		break;
7705 	default:
7706 		break;
7707 	}
7708 	return 0;
7709 }
7710 
7711 struct bpf_reg_types {
7712 	const enum bpf_reg_type types[10];
7713 	u32 *btf_id;
7714 };
7715 
7716 static const struct bpf_reg_types sock_types = {
7717 	.types = {
7718 		PTR_TO_SOCK_COMMON,
7719 		PTR_TO_SOCKET,
7720 		PTR_TO_TCP_SOCK,
7721 		PTR_TO_XDP_SOCK,
7722 	},
7723 };
7724 
7725 #ifdef CONFIG_NET
7726 static const struct bpf_reg_types btf_id_sock_common_types = {
7727 	.types = {
7728 		PTR_TO_SOCK_COMMON,
7729 		PTR_TO_SOCKET,
7730 		PTR_TO_TCP_SOCK,
7731 		PTR_TO_XDP_SOCK,
7732 		PTR_TO_BTF_ID,
7733 		PTR_TO_BTF_ID | PTR_TRUSTED,
7734 	},
7735 	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
7736 };
7737 #endif
7738 
7739 static const struct bpf_reg_types mem_types = {
7740 	.types = {
7741 		PTR_TO_STACK,
7742 		PTR_TO_PACKET,
7743 		PTR_TO_PACKET_META,
7744 		PTR_TO_MAP_KEY,
7745 		PTR_TO_MAP_VALUE,
7746 		PTR_TO_MEM,
7747 		PTR_TO_MEM | MEM_RINGBUF,
7748 		PTR_TO_BUF,
7749 		PTR_TO_BTF_ID | PTR_TRUSTED,
7750 		PTR_TO_CTX,
7751 	},
7752 };
7753 
7754 static const struct bpf_reg_types spin_lock_types = {
7755 	.types = {
7756 		PTR_TO_MAP_VALUE,
7757 		PTR_TO_BTF_ID | MEM_ALLOC,
7758 	}
7759 };
7760 
7761 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
7762 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
7763 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
7764 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
7765 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
7766 static const struct bpf_reg_types btf_ptr_types = {
7767 	.types = {
7768 		PTR_TO_BTF_ID,
7769 		PTR_TO_BTF_ID | PTR_TRUSTED,
7770 		PTR_TO_BTF_ID | MEM_RCU,
7771 	},
7772 };
7773 static const struct bpf_reg_types percpu_btf_ptr_types = {
7774 	.types = {
7775 		PTR_TO_BTF_ID | MEM_PERCPU,
7776 		PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU,
7777 		PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
7778 	}
7779 };
7780 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
7781 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
7782 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
7783 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
7784 static const struct bpf_reg_types kptr_xchg_dest_types = {
7785 	.types = {
7786 		PTR_TO_MAP_VALUE,
7787 		PTR_TO_BTF_ID | MEM_ALLOC,
7788 		PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF,
7789 		PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU,
7790 	}
7791 };
7792 static const struct bpf_reg_types dynptr_types = {
7793 	.types = {
7794 		PTR_TO_STACK,
7795 		CONST_PTR_TO_DYNPTR,
7796 	}
7797 };
7798 
7799 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
7800 	[ARG_PTR_TO_MAP_KEY]		= &mem_types,
7801 	[ARG_PTR_TO_MAP_VALUE]		= &mem_types,
7802 	[ARG_CONST_SIZE]		= &scalar_types,
7803 	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
7804 	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
7805 	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
7806 	[ARG_PTR_TO_CTX]		= &context_types,
7807 	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
7808 #ifdef CONFIG_NET
7809 	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
7810 #endif
7811 	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
7812 	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
7813 	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
7814 	[ARG_PTR_TO_MEM]		= &mem_types,
7815 	[ARG_PTR_TO_RINGBUF_MEM]	= &ringbuf_mem_types,
7816 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
7817 	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
7818 	[ARG_PTR_TO_STACK]		= &stack_ptr_types,
7819 	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
7820 	[ARG_PTR_TO_TIMER]		= &timer_types,
7821 	[ARG_KPTR_XCHG_DEST]		= &kptr_xchg_dest_types,
7822 	[ARG_PTR_TO_DYNPTR]		= &dynptr_types,
7823 };
7824 
7825 static int check_reg_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7826 			  enum bpf_arg_type arg_type,
7827 			  const u32 *arg_btf_id,
7828 			  struct bpf_call_arg_meta *meta)
7829 {
7830 	enum bpf_reg_type expected, type = reg->type;
7831 	const struct bpf_reg_types *compatible;
7832 	int i, j, err;
7833 
7834 	compatible = compatible_reg_types[base_type(arg_type)];
7835 	if (!compatible) {
7836 		verifier_bug(env, "unsupported arg type %d", arg_type);
7837 		return -EFAULT;
7838 	}
7839 
7840 	/* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
7841 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
7842 	 *
7843 	 * Same for MAYBE_NULL:
7844 	 *
7845 	 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
7846 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
7847 	 *
7848 	 * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
7849 	 *
7850 	 * Therefore we fold these flags depending on the arg_type before comparison.
7851 	 */
7852 	if (arg_type & MEM_RDONLY)
7853 		type &= ~MEM_RDONLY;
7854 	if (arg_type & PTR_MAYBE_NULL)
7855 		type &= ~PTR_MAYBE_NULL;
7856 	if (base_type(arg_type) == ARG_PTR_TO_MEM)
7857 		type &= ~DYNPTR_TYPE_FLAG_MASK;
7858 
7859 	/* Local kptr types are allowed as the source argument of bpf_kptr_xchg */
7860 	if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && reg_from_argno(argno) == BPF_REG_2) {
7861 		type &= ~MEM_ALLOC;
7862 		type &= ~MEM_PERCPU;
7863 	}
7864 
7865 	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
7866 		expected = compatible->types[i];
7867 		if (expected == NOT_INIT)
7868 			break;
7869 
7870 		if (type == expected)
7871 			goto found;
7872 	}
7873 
7874 	verbose(env, "%s type=%s expected=", reg_arg_name(env, argno), reg_type_str(env, reg->type));
7875 	for (j = 0; j + 1 < i; j++)
7876 		verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
7877 	verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
7878 	return -EACCES;
7879 
7880 found:
7881 	if (base_type(reg->type) != PTR_TO_BTF_ID)
7882 		return 0;
7883 
7884 	if (compatible == &mem_types) {
7885 		if (!(arg_type & MEM_RDONLY)) {
7886 			verbose(env,
7887 				"%s() may write into memory pointed by %s type=%s\n",
7888 				func_id_name(meta->func_id),
7889 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
7890 			return -EACCES;
7891 		}
7892 		return 0;
7893 	}
7894 
7895 	switch ((int)reg->type) {
7896 	case PTR_TO_BTF_ID:
7897 	case PTR_TO_BTF_ID | PTR_TRUSTED:
7898 	case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL:
7899 	case PTR_TO_BTF_ID | MEM_RCU:
7900 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
7901 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
7902 	{
7903 		/* For bpf_sk_release, it needs to match against first member
7904 		 * 'struct sock_common', hence make an exception for it. This
7905 		 * allows bpf_sk_release to work for multiple socket types.
7906 		 */
7907 		bool strict_type_match = arg_type_is_release(arg_type) &&
7908 					 meta->func_id != BPF_FUNC_sk_release;
7909 
7910 		if (type_may_be_null(reg->type) &&
7911 		    (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) {
7912 			verbose(env, "Possibly NULL pointer passed to helper %s\n",
7913 				reg_arg_name(env, argno));
7914 			return -EACCES;
7915 		}
7916 
7917 		if (!arg_btf_id) {
7918 			if (!compatible->btf_id) {
7919 				verifier_bug(env, "missing arg compatible BTF ID");
7920 				return -EFAULT;
7921 			}
7922 			arg_btf_id = compatible->btf_id;
7923 		}
7924 
7925 		if (meta->func_id == BPF_FUNC_kptr_xchg) {
7926 			if (map_kptr_match_type(env, meta->kptr_field, reg, reg_from_argno(argno)))
7927 				return -EACCES;
7928 		} else {
7929 			if (arg_btf_id == BPF_PTR_POISON) {
7930 				verbose(env, "verifier internal error:");
7931 				verbose(env, "%s has non-overwritten BPF_PTR_POISON type\n",
7932 					reg_arg_name(env, argno));
7933 				return -EACCES;
7934 			}
7935 
7936 			err = __check_ptr_off_reg(env, reg, argno, true);
7937 			if (err)
7938 				return err;
7939 
7940 			if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id,
7941 						  reg->var_off.value, btf_vmlinux, *arg_btf_id,
7942 						  strict_type_match)) {
7943 				verbose(env, "%s is of type %s but %s is expected\n",
7944 					reg_arg_name(env, argno),
7945 					btf_type_name(reg->btf, reg->btf_id),
7946 					btf_type_name(btf_vmlinux, *arg_btf_id));
7947 				return -EACCES;
7948 			}
7949 		}
7950 		break;
7951 	}
7952 	case PTR_TO_BTF_ID | MEM_ALLOC:
7953 	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
7954 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
7955 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
7956 		if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
7957 		    meta->func_id != BPF_FUNC_kptr_xchg) {
7958 			verifier_bug(env, "unimplemented handling of MEM_ALLOC");
7959 			return -EFAULT;
7960 		}
7961 		/* Check if local kptr in src arg matches kptr in dst arg */
7962 		if (meta->func_id == BPF_FUNC_kptr_xchg) {
7963 			int regno = reg_from_argno(argno);
7964 
7965 			if (regno == BPF_REG_2 &&
7966 			    map_kptr_match_type(env, meta->kptr_field, reg, regno))
7967 				return -EACCES;
7968 		}
7969 		break;
7970 	case PTR_TO_BTF_ID | MEM_PERCPU:
7971 	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU:
7972 	case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
7973 		/* Handled by helper specific checks */
7974 		break;
7975 	default:
7976 		verifier_bug(env, "invalid PTR_TO_BTF_ID register for type match");
7977 		return -EFAULT;
7978 	}
7979 	return 0;
7980 }
7981 
7982 static struct btf_field *
7983 reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
7984 {
7985 	struct btf_field *field;
7986 	struct btf_record *rec;
7987 
7988 	rec = reg_btf_record(reg);
7989 	if (!rec)
7990 		return NULL;
7991 
7992 	field = btf_record_find(rec, off, fields);
7993 	if (!field)
7994 		return NULL;
7995 
7996 	return field;
7997 }
7998 
7999 static int check_func_arg_reg_off(struct bpf_verifier_env *env,
8000 				  const struct bpf_reg_state *reg, argno_t argno,
8001 				  enum bpf_arg_type arg_type)
8002 {
8003 	u32 type = reg->type;
8004 
8005 	/* When referenced register is passed to release function, its fixed
8006 	 * offset must be 0.
8007 	 *
8008 	 * We will check arg_type_is_release reg has id when storing
8009 	 * meta->release_regno.
8010 	 */
8011 	if (arg_type_is_release(arg_type)) {
8012 		/* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
8013 		 * may not directly point to the object being released, but to
8014 		 * dynptr pointing to such object, which might be at some offset
8015 		 * on the stack. In that case, we simply to fallback to the
8016 		 * default handling.
8017 		 */
8018 		if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
8019 			return 0;
8020 
8021 		/* Doing check_ptr_off_reg check for the offset will catch this
8022 		 * because fixed_off_ok is false, but checking here allows us
8023 		 * to give the user a better error message.
8024 		 */
8025 		if (!tnum_is_const(reg->var_off) || reg->var_off.value != 0) {
8026 			verbose(env, "%s must have zero offset when passed to release func or trusted arg to kfunc\n",
8027 				reg_arg_name(env, argno));
8028 			return -EINVAL;
8029 		}
8030 	}
8031 
8032 	switch (type) {
8033 	/* Pointer types where both fixed and variable offset is explicitly allowed: */
8034 	case PTR_TO_STACK:
8035 	case PTR_TO_PACKET:
8036 	case PTR_TO_PACKET_META:
8037 	case PTR_TO_MAP_KEY:
8038 	case PTR_TO_MAP_VALUE:
8039 	case PTR_TO_MEM:
8040 	case PTR_TO_MEM | MEM_RDONLY:
8041 	case PTR_TO_MEM | MEM_RINGBUF:
8042 	case PTR_TO_BUF:
8043 	case PTR_TO_BUF | MEM_RDONLY:
8044 	case PTR_TO_ARENA:
8045 	case SCALAR_VALUE:
8046 		return 0;
8047 	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
8048 	 * fixed offset.
8049 	 */
8050 	case PTR_TO_BTF_ID:
8051 	case PTR_TO_BTF_ID | MEM_ALLOC:
8052 	case PTR_TO_BTF_ID | PTR_TRUSTED:
8053 	case PTR_TO_BTF_ID | MEM_RCU:
8054 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
8055 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
8056 		/* When referenced PTR_TO_BTF_ID is passed to release function,
8057 		 * its fixed offset must be 0. In the other cases, fixed offset
8058 		 * can be non-zero. This was already checked above. So pass
8059 		 * fixed_off_ok as true to allow fixed offset for all other
8060 		 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
8061 		 * still need to do checks instead of returning.
8062 		 */
8063 		return __check_ptr_off_reg(env, reg, argno, true);
8064 	case PTR_TO_CTX:
8065 		/*
8066 		 * Allow fixed and variable offsets for syscall context, but
8067 		 * only when the argument is passed as memory, not ctx,
8068 		 * otherwise we may get modified ctx in tail called programs and
8069 		 * global subprogs (that may act as extension prog hooks).
8070 		 */
8071 		if (arg_type != ARG_PTR_TO_CTX && is_var_ctx_off_allowed(env->prog))
8072 			return 0;
8073 		fallthrough;
8074 	default:
8075 		return __check_ptr_off_reg(env, reg, argno, false);
8076 	}
8077 }
8078 
8079 static int check_arg_const_str(struct bpf_verifier_env *env,
8080 			       struct bpf_reg_state *reg, argno_t argno)
8081 {
8082 	struct bpf_map *map = reg->map_ptr;
8083 	int err;
8084 	int map_off;
8085 	u64 map_addr;
8086 	char *str_ptr;
8087 
8088 	if (reg->type != PTR_TO_MAP_VALUE)
8089 		return -EINVAL;
8090 
8091 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
8092 		verbose(env, "%s points to insn_array map which cannot be used as const string\n",
8093 			reg_arg_name(env, argno));
8094 		return -EACCES;
8095 	}
8096 
8097 	if (!bpf_map_is_rdonly(map)) {
8098 		verbose(env, "%s does not point to a readonly map'\n", reg_arg_name(env, argno));
8099 		return -EACCES;
8100 	}
8101 
8102 	if (!tnum_is_const(reg->var_off)) {
8103 		verbose(env, "%s is not a constant address'\n", reg_arg_name(env, argno));
8104 		return -EACCES;
8105 	}
8106 
8107 	if (!map->ops->map_direct_value_addr) {
8108 		verbose(env, "no direct value access support for this map type\n");
8109 		return -EACCES;
8110 	}
8111 
8112 	err = check_map_access(env, reg, argno, 0,
8113 			       map->value_size - reg->var_off.value, false,
8114 			       ACCESS_HELPER);
8115 	if (err)
8116 		return err;
8117 
8118 	map_off = reg->var_off.value;
8119 	err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
8120 	if (err) {
8121 		verbose(env, "direct value access on string failed\n");
8122 		return err;
8123 	}
8124 
8125 	str_ptr = (char *)(long)(map_addr);
8126 	if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
8127 		verbose(env, "string is not zero-terminated\n");
8128 		return -EINVAL;
8129 	}
8130 	return 0;
8131 }
8132 
8133 /* Returns constant key value in `value` if possible, else negative error */
8134 static int get_constant_map_key(struct bpf_verifier_env *env,
8135 				struct bpf_reg_state *key,
8136 				u32 key_size,
8137 				s64 *value)
8138 {
8139 	struct bpf_func_state *state = bpf_func(env, key);
8140 	struct bpf_reg_state *reg;
8141 	int slot, spi, off;
8142 	int spill_size = 0;
8143 	int zero_size = 0;
8144 	int stack_off;
8145 	int i, err;
8146 	u8 *stype;
8147 
8148 	if (!env->bpf_capable)
8149 		return -EOPNOTSUPP;
8150 	if (key->type != PTR_TO_STACK)
8151 		return -EOPNOTSUPP;
8152 	if (!tnum_is_const(key->var_off))
8153 		return -EOPNOTSUPP;
8154 
8155 	stack_off = key->var_off.value;
8156 	slot = -stack_off - 1;
8157 	spi = slot / BPF_REG_SIZE;
8158 	off = slot % BPF_REG_SIZE;
8159 	stype = state->stack[spi].slot_type;
8160 
8161 	/* First handle precisely tracked STACK_ZERO */
8162 	for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--)
8163 		zero_size++;
8164 	if (zero_size >= key_size) {
8165 		*value = 0;
8166 		return 0;
8167 	}
8168 
8169 	/* Check that stack contains a scalar spill of expected size */
8170 	if (!bpf_is_spilled_scalar_reg(&state->stack[spi]))
8171 		return -EOPNOTSUPP;
8172 	for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--)
8173 		spill_size++;
8174 	if (spill_size != key_size)
8175 		return -EOPNOTSUPP;
8176 
8177 	reg = &state->stack[spi].spilled_ptr;
8178 	if (!tnum_is_const(reg->var_off))
8179 		/* Stack value not statically known */
8180 		return -EOPNOTSUPP;
8181 
8182 	/* We are relying on a constant value. So mark as precise
8183 	 * to prevent pruning on it.
8184 	 */
8185 	bpf_bt_set_frame_slot(&env->bt, key->frameno, spi);
8186 	err = mark_chain_precision_batch(env, env->cur_state);
8187 	if (err < 0)
8188 		return err;
8189 
8190 	*value = reg->var_off.value;
8191 	return 0;
8192 }
8193 
8194 static bool can_elide_value_nullness(const struct bpf_map *map);
8195 
8196 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
8197 			  struct bpf_call_arg_meta *meta,
8198 			  const struct bpf_func_proto *fn,
8199 			  int insn_idx)
8200 {
8201 	u32 regno = BPF_REG_1 + arg;
8202 	struct bpf_reg_state *reg = reg_state(env, regno);
8203 	enum bpf_arg_type arg_type = fn->arg_type[arg];
8204 	argno_t argno = argno_from_arg(arg + 1);
8205 	enum bpf_reg_type type = reg->type;
8206 	u32 *arg_btf_id = NULL;
8207 	u32 key_size;
8208 	int err = 0;
8209 
8210 	if (arg_type == ARG_DONTCARE)
8211 		return 0;
8212 
8213 	err = check_reg_arg(env, regno, SRC_OP);
8214 	if (err)
8215 		return err;
8216 
8217 	if (arg_type == ARG_ANYTHING) {
8218 		if (is_pointer_value(env, regno)) {
8219 			verbose(env, "R%d leaks addr into helper function\n",
8220 				regno);
8221 			return -EACCES;
8222 		}
8223 		return 0;
8224 	}
8225 
8226 	if (type_is_pkt_pointer(type) &&
8227 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
8228 		verbose(env, "helper access to the packet is not allowed\n");
8229 		return -EACCES;
8230 	}
8231 
8232 	if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
8233 		err = resolve_map_arg_type(env, meta, &arg_type);
8234 		if (err)
8235 			return err;
8236 	}
8237 
8238 	if (bpf_register_is_null(reg) && type_may_be_null(arg_type))
8239 		/* A NULL register has a SCALAR_VALUE type, so skip
8240 		 * type checking.
8241 		 */
8242 		goto skip_type_check;
8243 
8244 	/* arg_btf_id and arg_size are in a union. */
8245 	if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
8246 	    base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
8247 		arg_btf_id = fn->arg_btf_id[arg];
8248 
8249 	err = check_reg_type(env, reg, argno_from_reg(regno), arg_type, arg_btf_id, meta);
8250 	if (err)
8251 		return err;
8252 
8253 	err = check_func_arg_reg_off(env, reg, argno_from_reg(regno), arg_type);
8254 	if (err)
8255 		return err;
8256 
8257 skip_type_check:
8258 	if (arg_type_is_release(arg_type) && !arg_type_is_dynptr(arg_type) &&
8259 	    !reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) {
8260 		verbose(env, "release helper %s expects referenced PTR_TO_BTF_ID passed to %s\n",
8261 			func_id_name(meta->func_id), reg_arg_name(env, argno));
8262 		return -EINVAL;
8263 	}
8264 
8265 	if (reg_is_referenced(env, reg))
8266 		update_ref_obj(&meta->ref_obj, reg);
8267 
8268 	switch (base_type(arg_type)) {
8269 	case ARG_CONST_MAP_PTR:
8270 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
8271 		if (meta->map.ptr) {
8272 			/* Use map_uid (which is unique id of inner map) to reject:
8273 			 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
8274 			 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
8275 			 * if (inner_map1 && inner_map2) {
8276 			 *     timer = bpf_map_lookup_elem(inner_map1);
8277 			 *     if (timer)
8278 			 *         // mismatch would have been allowed
8279 			 *         bpf_timer_init(timer, inner_map2);
8280 			 * }
8281 			 *
8282 			 * Comparing map_ptr is enough to distinguish normal and outer maps.
8283 			 */
8284 			if (meta->map.ptr != reg->map_ptr ||
8285 			    meta->map.uid != reg->map_uid) {
8286 				verbose(env,
8287 					"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
8288 					meta->map.uid, reg->map_uid);
8289 				return -EINVAL;
8290 			}
8291 		}
8292 		meta->map.ptr = reg->map_ptr;
8293 		meta->map.uid = reg->map_uid;
8294 		break;
8295 	case ARG_PTR_TO_MAP_KEY:
8296 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
8297 		 * check that [key, key + map->key_size) are within
8298 		 * stack limits and initialized
8299 		 */
8300 		if (!meta->map.ptr) {
8301 			/* in function declaration map_ptr must come before
8302 			 * map_key, so that it's verified and known before
8303 			 * we have to check map_key here. Otherwise it means
8304 			 * that kernel subsystem misconfigured verifier
8305 			 */
8306 			verifier_bug(env, "invalid map_ptr to access map->key");
8307 			return -EFAULT;
8308 		}
8309 		key_size = meta->map.ptr->key_size;
8310 		err = check_helper_mem_access(env, reg, argno_from_reg(regno), key_size, BPF_READ, false, NULL);
8311 		if (err)
8312 			return err;
8313 		if (can_elide_value_nullness(meta->map.ptr)) {
8314 			err = get_constant_map_key(env, reg, key_size, &meta->const_map_key);
8315 			if (err < 0) {
8316 				meta->const_map_key = -1;
8317 				if (err == -EOPNOTSUPP)
8318 					err = 0;
8319 				else
8320 					return err;
8321 			}
8322 		}
8323 		break;
8324 	case ARG_PTR_TO_MAP_VALUE:
8325 		if (type_may_be_null(arg_type) && bpf_register_is_null(reg))
8326 			return 0;
8327 
8328 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
8329 		 * check [value, value + map->value_size) validity
8330 		 */
8331 		if (!meta->map.ptr) {
8332 			/* kernel subsystem misconfigured verifier */
8333 			verifier_bug(env, "invalid map_ptr to access map->value");
8334 			return -EFAULT;
8335 		}
8336 		meta->raw_mode = arg_type & MEM_UNINIT;
8337 		err = check_helper_mem_access(env, reg, argno_from_reg(regno), meta->map.ptr->value_size,
8338 					      arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
8339 					      false, meta);
8340 		break;
8341 	case ARG_PTR_TO_PERCPU_BTF_ID:
8342 		if (!reg->btf_id) {
8343 			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
8344 			return -EACCES;
8345 		}
8346 		meta->ret_btf = reg->btf;
8347 		meta->ret_btf_id = reg->btf_id;
8348 		break;
8349 	case ARG_PTR_TO_SPIN_LOCK:
8350 		if (in_rbtree_lock_required_cb(env)) {
8351 			verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
8352 			return -EACCES;
8353 		}
8354 		if (meta->func_id == BPF_FUNC_spin_lock) {
8355 			err = process_spin_lock(env, reg, argno_from_reg(regno), PROCESS_SPIN_LOCK);
8356 			if (err)
8357 				return err;
8358 		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
8359 			err = process_spin_lock(env, reg, argno_from_reg(regno), 0);
8360 			if (err)
8361 				return err;
8362 		} else {
8363 			verifier_bug(env, "spin lock arg on unexpected helper");
8364 			return -EFAULT;
8365 		}
8366 		break;
8367 	case ARG_PTR_TO_TIMER:
8368 		err = process_timer_helper(env, reg, argno_from_reg(regno), meta);
8369 		if (err)
8370 			return err;
8371 		break;
8372 	case ARG_PTR_TO_FUNC:
8373 		meta->subprogno = reg->subprogno;
8374 		break;
8375 	case ARG_PTR_TO_MEM:
8376 		/* The access to this pointer is only checked when we hit the
8377 		 * next is_mem_size argument below.
8378 		 */
8379 		meta->raw_mode = arg_type & MEM_UNINIT;
8380 		if (arg_type & MEM_FIXED_SIZE) {
8381 			err = check_helper_mem_access(env, reg, argno_from_reg(regno), fn->arg_size[arg],
8382 						      arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
8383 						      false, meta);
8384 			if (err)
8385 				return err;
8386 			if (arg_type & MEM_ALIGNED)
8387 				err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
8388 		}
8389 		break;
8390 	case ARG_CONST_SIZE:
8391 		err = check_mem_size_reg(env, reg_state(env, regno - 1), reg, argno_from_reg(regno - 1),
8392 					 argno_from_reg(regno),
8393 					 fn->arg_type[arg - 1] & MEM_WRITE ?
8394 					 BPF_WRITE : BPF_READ,
8395 					 false, meta);
8396 		break;
8397 	case ARG_CONST_SIZE_OR_ZERO:
8398 		err = check_mem_size_reg(env, reg_state(env, regno - 1), reg, argno_from_reg(regno - 1),
8399 					 argno_from_reg(regno),
8400 					 fn->arg_type[arg - 1] & MEM_WRITE ?
8401 					 BPF_WRITE : BPF_READ,
8402 					 true, meta);
8403 		break;
8404 	case ARG_PTR_TO_DYNPTR:
8405 		err = process_dynptr_func(env, reg, argno_from_reg(regno), insn_idx, arg_type, &meta->ref_obj,
8406 					  &meta->dynptr);
8407 		if (err)
8408 			return err;
8409 		break;
8410 	case ARG_CONST_ALLOC_SIZE_OR_ZERO:
8411 		if (!tnum_is_const(reg->var_off)) {
8412 			verbose(env, "R%d is not a known constant'\n",
8413 				regno);
8414 			return -EACCES;
8415 		}
8416 		meta->mem_size = reg->var_off.value;
8417 		err = mark_chain_precision(env, regno);
8418 		if (err)
8419 			return err;
8420 		break;
8421 	case ARG_PTR_TO_CONST_STR:
8422 	{
8423 		err = check_arg_const_str(env, reg, argno_from_reg(regno));
8424 		if (err)
8425 			return err;
8426 		break;
8427 	}
8428 	case ARG_KPTR_XCHG_DEST:
8429 		err = process_kptr_func(env, regno, meta);
8430 		if (err)
8431 			return err;
8432 		break;
8433 	}
8434 
8435 	return err;
8436 }
8437 
8438 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
8439 {
8440 	enum bpf_attach_type eatype = env->prog->expected_attach_type;
8441 	enum bpf_prog_type type = resolve_prog_type(env->prog);
8442 
8443 	if (func_id != BPF_FUNC_map_update_elem &&
8444 	    func_id != BPF_FUNC_map_delete_elem)
8445 		return false;
8446 
8447 	/* It's not possible to get access to a locked struct sock in these
8448 	 * contexts, so updating is safe.
8449 	 */
8450 	switch (type) {
8451 	case BPF_PROG_TYPE_TRACING:
8452 		if (eatype == BPF_TRACE_ITER)
8453 			return true;
8454 		break;
8455 	case BPF_PROG_TYPE_SOCK_OPS:
8456 		/* map_update allowed only via dedicated helpers with event type checks */
8457 		if (func_id == BPF_FUNC_map_delete_elem)
8458 			return true;
8459 		break;
8460 	case BPF_PROG_TYPE_SOCKET_FILTER:
8461 	case BPF_PROG_TYPE_SCHED_CLS:
8462 	case BPF_PROG_TYPE_SCHED_ACT:
8463 	case BPF_PROG_TYPE_XDP:
8464 	case BPF_PROG_TYPE_SK_REUSEPORT:
8465 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
8466 	case BPF_PROG_TYPE_SK_LOOKUP:
8467 		return true;
8468 	default:
8469 		break;
8470 	}
8471 
8472 	verbose(env, "cannot update sockmap in this context\n");
8473 	return false;
8474 }
8475 
8476 bool bpf_allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
8477 {
8478 	return env->prog->jit_requested &&
8479 	       bpf_jit_supports_subprog_tailcalls();
8480 }
8481 
8482 static int check_map_func_compatibility(struct bpf_verifier_env *env,
8483 					struct bpf_map *map, int func_id)
8484 {
8485 	if (!map)
8486 		return 0;
8487 
8488 	/* We need a two way check, first is from map perspective ... */
8489 	switch (map->map_type) {
8490 	case BPF_MAP_TYPE_PROG_ARRAY:
8491 		if (func_id != BPF_FUNC_tail_call)
8492 			goto error;
8493 		break;
8494 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
8495 		if (func_id != BPF_FUNC_perf_event_read &&
8496 		    func_id != BPF_FUNC_perf_event_output &&
8497 		    func_id != BPF_FUNC_skb_output &&
8498 		    func_id != BPF_FUNC_perf_event_read_value &&
8499 		    func_id != BPF_FUNC_xdp_output)
8500 			goto error;
8501 		break;
8502 	case BPF_MAP_TYPE_RINGBUF:
8503 		if (func_id != BPF_FUNC_ringbuf_output &&
8504 		    func_id != BPF_FUNC_ringbuf_reserve &&
8505 		    func_id != BPF_FUNC_ringbuf_query &&
8506 		    func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
8507 		    func_id != BPF_FUNC_ringbuf_submit_dynptr &&
8508 		    func_id != BPF_FUNC_ringbuf_discard_dynptr)
8509 			goto error;
8510 		break;
8511 	case BPF_MAP_TYPE_USER_RINGBUF:
8512 		if (func_id != BPF_FUNC_user_ringbuf_drain)
8513 			goto error;
8514 		break;
8515 	case BPF_MAP_TYPE_STACK_TRACE:
8516 		if (func_id != BPF_FUNC_get_stackid)
8517 			goto error;
8518 		break;
8519 	case BPF_MAP_TYPE_CGROUP_ARRAY:
8520 		if (func_id != BPF_FUNC_skb_under_cgroup &&
8521 		    func_id != BPF_FUNC_current_task_under_cgroup)
8522 			goto error;
8523 		break;
8524 	case BPF_MAP_TYPE_CGROUP_STORAGE:
8525 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
8526 		if (func_id != BPF_FUNC_get_local_storage)
8527 			goto error;
8528 		break;
8529 	case BPF_MAP_TYPE_DEVMAP:
8530 	case BPF_MAP_TYPE_DEVMAP_HASH:
8531 		if (func_id != BPF_FUNC_redirect_map &&
8532 		    func_id != BPF_FUNC_map_lookup_elem)
8533 			goto error;
8534 		break;
8535 	/* Restrict bpf side of cpumap and xskmap, open when use-cases
8536 	 * appear.
8537 	 */
8538 	case BPF_MAP_TYPE_CPUMAP:
8539 		if (func_id != BPF_FUNC_redirect_map)
8540 			goto error;
8541 		break;
8542 	case BPF_MAP_TYPE_XSKMAP:
8543 		if (func_id != BPF_FUNC_redirect_map &&
8544 		    func_id != BPF_FUNC_map_lookup_elem)
8545 			goto error;
8546 		break;
8547 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
8548 	case BPF_MAP_TYPE_HASH_OF_MAPS:
8549 		if (func_id != BPF_FUNC_map_lookup_elem)
8550 			goto error;
8551 		break;
8552 	case BPF_MAP_TYPE_SOCKMAP:
8553 		if (func_id != BPF_FUNC_sk_redirect_map &&
8554 		    func_id != BPF_FUNC_sock_map_update &&
8555 		    func_id != BPF_FUNC_msg_redirect_map &&
8556 		    func_id != BPF_FUNC_sk_select_reuseport &&
8557 		    func_id != BPF_FUNC_map_lookup_elem &&
8558 		    !may_update_sockmap(env, func_id))
8559 			goto error;
8560 		break;
8561 	case BPF_MAP_TYPE_SOCKHASH:
8562 		if (func_id != BPF_FUNC_sk_redirect_hash &&
8563 		    func_id != BPF_FUNC_sock_hash_update &&
8564 		    func_id != BPF_FUNC_msg_redirect_hash &&
8565 		    func_id != BPF_FUNC_sk_select_reuseport &&
8566 		    func_id != BPF_FUNC_map_lookup_elem &&
8567 		    !may_update_sockmap(env, func_id))
8568 			goto error;
8569 		break;
8570 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
8571 		if (func_id != BPF_FUNC_sk_select_reuseport)
8572 			goto error;
8573 		break;
8574 	case BPF_MAP_TYPE_QUEUE:
8575 	case BPF_MAP_TYPE_STACK:
8576 		if (func_id != BPF_FUNC_map_peek_elem &&
8577 		    func_id != BPF_FUNC_map_pop_elem &&
8578 		    func_id != BPF_FUNC_map_push_elem)
8579 			goto error;
8580 		break;
8581 	case BPF_MAP_TYPE_SK_STORAGE:
8582 		if (func_id != BPF_FUNC_sk_storage_get &&
8583 		    func_id != BPF_FUNC_sk_storage_delete &&
8584 		    func_id != BPF_FUNC_kptr_xchg)
8585 			goto error;
8586 		break;
8587 	case BPF_MAP_TYPE_INODE_STORAGE:
8588 		if (func_id != BPF_FUNC_inode_storage_get &&
8589 		    func_id != BPF_FUNC_inode_storage_delete &&
8590 		    func_id != BPF_FUNC_kptr_xchg)
8591 			goto error;
8592 		break;
8593 	case BPF_MAP_TYPE_TASK_STORAGE:
8594 		if (func_id != BPF_FUNC_task_storage_get &&
8595 		    func_id != BPF_FUNC_task_storage_delete &&
8596 		    func_id != BPF_FUNC_kptr_xchg)
8597 			goto error;
8598 		break;
8599 	case BPF_MAP_TYPE_CGRP_STORAGE:
8600 		if (func_id != BPF_FUNC_cgrp_storage_get &&
8601 		    func_id != BPF_FUNC_cgrp_storage_delete &&
8602 		    func_id != BPF_FUNC_kptr_xchg)
8603 			goto error;
8604 		break;
8605 	case BPF_MAP_TYPE_BLOOM_FILTER:
8606 		if (func_id != BPF_FUNC_map_peek_elem &&
8607 		    func_id != BPF_FUNC_map_push_elem)
8608 			goto error;
8609 		break;
8610 	case BPF_MAP_TYPE_INSN_ARRAY:
8611 		goto error;
8612 	default:
8613 		break;
8614 	}
8615 
8616 	/* ... and second from the function itself. */
8617 	switch (func_id) {
8618 	case BPF_FUNC_tail_call:
8619 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
8620 			goto error;
8621 		if (env->subprog_cnt > 1 && !bpf_allow_tail_call_in_subprogs(env)) {
8622 			verbose(env, "mixing of tail_calls and bpf-to-bpf calls is not supported\n");
8623 			return -EINVAL;
8624 		}
8625 		break;
8626 	case BPF_FUNC_perf_event_read:
8627 	case BPF_FUNC_perf_event_output:
8628 	case BPF_FUNC_perf_event_read_value:
8629 	case BPF_FUNC_skb_output:
8630 	case BPF_FUNC_xdp_output:
8631 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
8632 			goto error;
8633 		break;
8634 	case BPF_FUNC_ringbuf_output:
8635 	case BPF_FUNC_ringbuf_reserve:
8636 	case BPF_FUNC_ringbuf_query:
8637 	case BPF_FUNC_ringbuf_reserve_dynptr:
8638 	case BPF_FUNC_ringbuf_submit_dynptr:
8639 	case BPF_FUNC_ringbuf_discard_dynptr:
8640 		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
8641 			goto error;
8642 		break;
8643 	case BPF_FUNC_user_ringbuf_drain:
8644 		if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
8645 			goto error;
8646 		break;
8647 	case BPF_FUNC_get_stackid:
8648 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
8649 			goto error;
8650 		break;
8651 	case BPF_FUNC_current_task_under_cgroup:
8652 	case BPF_FUNC_skb_under_cgroup:
8653 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
8654 			goto error;
8655 		break;
8656 	case BPF_FUNC_redirect_map:
8657 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
8658 		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
8659 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
8660 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
8661 			goto error;
8662 		break;
8663 	case BPF_FUNC_sk_redirect_map:
8664 	case BPF_FUNC_msg_redirect_map:
8665 	case BPF_FUNC_sock_map_update:
8666 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
8667 			goto error;
8668 		break;
8669 	case BPF_FUNC_sk_redirect_hash:
8670 	case BPF_FUNC_msg_redirect_hash:
8671 	case BPF_FUNC_sock_hash_update:
8672 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
8673 			goto error;
8674 		break;
8675 	case BPF_FUNC_get_local_storage:
8676 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
8677 		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
8678 			goto error;
8679 		break;
8680 	case BPF_FUNC_sk_select_reuseport:
8681 		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
8682 		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
8683 		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
8684 			goto error;
8685 		break;
8686 	case BPF_FUNC_map_pop_elem:
8687 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8688 		    map->map_type != BPF_MAP_TYPE_STACK)
8689 			goto error;
8690 		break;
8691 	case BPF_FUNC_map_peek_elem:
8692 	case BPF_FUNC_map_push_elem:
8693 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8694 		    map->map_type != BPF_MAP_TYPE_STACK &&
8695 		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
8696 			goto error;
8697 		break;
8698 	case BPF_FUNC_map_lookup_percpu_elem:
8699 		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
8700 		    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
8701 		    map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
8702 			goto error;
8703 		break;
8704 	case BPF_FUNC_sk_storage_get:
8705 	case BPF_FUNC_sk_storage_delete:
8706 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
8707 			goto error;
8708 		break;
8709 	case BPF_FUNC_inode_storage_get:
8710 	case BPF_FUNC_inode_storage_delete:
8711 		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
8712 			goto error;
8713 		break;
8714 	case BPF_FUNC_task_storage_get:
8715 	case BPF_FUNC_task_storage_delete:
8716 		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
8717 			goto error;
8718 		break;
8719 	case BPF_FUNC_cgrp_storage_get:
8720 	case BPF_FUNC_cgrp_storage_delete:
8721 		if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
8722 			goto error;
8723 		break;
8724 	default:
8725 		break;
8726 	}
8727 
8728 	return 0;
8729 error:
8730 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
8731 		map->map_type, func_id_name(func_id), func_id);
8732 	return -EINVAL;
8733 }
8734 
8735 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
8736 {
8737 	int count = 0;
8738 
8739 	if (arg_type_is_raw_mem(fn->arg1_type))
8740 		count++;
8741 	if (arg_type_is_raw_mem(fn->arg2_type))
8742 		count++;
8743 	if (arg_type_is_raw_mem(fn->arg3_type))
8744 		count++;
8745 	if (arg_type_is_raw_mem(fn->arg4_type))
8746 		count++;
8747 	if (arg_type_is_raw_mem(fn->arg5_type))
8748 		count++;
8749 
8750 	/* We only support one arg being in raw mode at the moment,
8751 	 * which is sufficient for the helper functions we have
8752 	 * right now.
8753 	 */
8754 	return count <= 1;
8755 }
8756 
8757 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
8758 {
8759 	bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
8760 	bool has_size = fn->arg_size[arg] != 0;
8761 	bool is_next_size = false;
8762 
8763 	if (arg + 1 < ARRAY_SIZE(fn->arg_type))
8764 		is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
8765 
8766 	if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
8767 		return is_next_size;
8768 
8769 	return has_size == is_next_size || is_next_size == is_fixed;
8770 }
8771 
8772 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
8773 {
8774 	/* bpf_xxx(..., buf, len) call will access 'len'
8775 	 * bytes from memory 'buf'. Both arg types need
8776 	 * to be paired, so make sure there's no buggy
8777 	 * helper function specification.
8778 	 */
8779 	if (arg_type_is_mem_size(fn->arg1_type) ||
8780 	    check_args_pair_invalid(fn, 0) ||
8781 	    check_args_pair_invalid(fn, 1) ||
8782 	    check_args_pair_invalid(fn, 2) ||
8783 	    check_args_pair_invalid(fn, 3) ||
8784 	    check_args_pair_invalid(fn, 4))
8785 		return false;
8786 
8787 	return true;
8788 }
8789 
8790 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
8791 {
8792 	int i;
8793 
8794 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
8795 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
8796 			return !!fn->arg_btf_id[i];
8797 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
8798 			return fn->arg_btf_id[i] == BPF_PTR_POISON;
8799 		if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
8800 		    /* arg_btf_id and arg_size are in a union. */
8801 		    (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
8802 		     !(fn->arg_type[i] & MEM_FIXED_SIZE)))
8803 			return false;
8804 	}
8805 
8806 	return true;
8807 }
8808 
8809 static bool check_mem_arg_rw_flag_ok(const struct bpf_func_proto *fn)
8810 {
8811 	int i;
8812 
8813 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
8814 		enum bpf_arg_type arg_type = fn->arg_type[i];
8815 
8816 		if (base_type(arg_type) != ARG_PTR_TO_MEM)
8817 			continue;
8818 		if (!(arg_type & (MEM_WRITE | MEM_RDONLY)))
8819 			return false;
8820 	}
8821 
8822 	return true;
8823 }
8824 
8825 static bool check_proto_release_reg(const struct bpf_func_proto *fn, struct bpf_call_arg_meta *meta)
8826 {
8827 	int i;
8828 
8829 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
8830 		enum bpf_arg_type arg_type = fn->arg_type[i];
8831 
8832 		if (arg_type_is_release(arg_type)) {
8833 			if (meta->release_regno)
8834 				return false;
8835 			meta->release_regno = i + 1;
8836 		}
8837 	}
8838 
8839 	return true;
8840 }
8841 
8842 static int check_func_proto(const struct bpf_func_proto *fn, struct bpf_call_arg_meta *meta)
8843 {
8844 	return check_raw_mode_ok(fn) &&
8845 	       check_arg_pair_ok(fn) &&
8846 	       check_mem_arg_rw_flag_ok(fn) &&
8847 	       check_proto_release_reg(fn, meta) &&
8848 	       check_btf_id_ok(fn) ? 0 : -EINVAL;
8849 }
8850 
8851 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
8852  * are now invalid, so turn them into unknown SCALAR_VALUE.
8853  *
8854  * This also applies to dynptr slices belonging to skb and xdp dynptrs,
8855  * since these slices point to packet data.
8856  */
8857 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
8858 {
8859 	struct bpf_func_state *state;
8860 	struct bpf_reg_state *reg;
8861 
8862 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
8863 		if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
8864 			mark_reg_invalid(env, reg);
8865 	}));
8866 }
8867 
8868 enum {
8869 	AT_PKT_END = -1,
8870 	BEYOND_PKT_END = -2,
8871 };
8872 
8873 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
8874 {
8875 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
8876 	struct bpf_reg_state *reg = &state->regs[regn];
8877 
8878 	if (reg->type != PTR_TO_PACKET)
8879 		/* PTR_TO_PACKET_META is not supported yet */
8880 		return;
8881 
8882 	/* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
8883 	 * How far beyond pkt_end it goes is unknown.
8884 	 * if (!range_open) it's the case of pkt >= pkt_end
8885 	 * if (range_open) it's the case of pkt > pkt_end
8886 	 * hence this pointer is at least 1 byte bigger than pkt_end
8887 	 */
8888 	if (range_open)
8889 		reg->range = BEYOND_PKT_END;
8890 	else
8891 		reg->range = AT_PKT_END;
8892 }
8893 
8894 static int release_reference_nomark(struct bpf_verifier_state *state, int id)
8895 {
8896 	int i;
8897 
8898 	for (i = 0; i < state->acquired_refs; i++) {
8899 		if (state->refs[i].type != REF_TYPE_PTR)
8900 			continue;
8901 		if (state->refs[i].id == id) {
8902 			release_reference_state(state, i);
8903 			return 0;
8904 		}
8905 	}
8906 	return -EINVAL;
8907 }
8908 
8909 static int idstack_push(struct bpf_idmap *idmap, u32 id)
8910 {
8911 	int i;
8912 
8913 	if (!id)
8914 		return 0;
8915 
8916 	for (i = 0; i < idmap->cnt; i++)
8917 		if (idmap->map[i].old == id)
8918 			return 0;
8919 
8920 	if (WARN_ON_ONCE(idmap->cnt >= BPF_ID_MAP_SIZE))
8921 		return -EFAULT;
8922 
8923 	idmap->map[idmap->cnt++].old = id;
8924 	return 0;
8925 }
8926 
8927 static int idstack_pop(struct bpf_idmap *idmap)
8928 {
8929 	if (!idmap->cnt)
8930 		return 0;
8931 
8932 	return idmap->map[--idmap->cnt].old;
8933 }
8934 
8935 /* Release id and objects derived from it iteratively in a DFS manner */
8936 static int release_reference(struct bpf_verifier_env *env, int id)
8937 {
8938 	u32 mask = (1 << STACK_SPILL) | (1 << STACK_DYNPTR);
8939 	struct bpf_verifier_state *vstate = env->cur_state;
8940 	struct bpf_idmap *idstack = &env->idmap_scratch;
8941 	struct bpf_stack_state *stack;
8942 	struct bpf_func_state *state;
8943 	struct bpf_reg_state *reg;
8944 	int i, err;
8945 
8946 	idstack->cnt = 0;
8947 	err = idstack_push(idstack, id);
8948 	if (err)
8949 		return err;
8950 
8951 	if (find_reference_state(vstate, id))
8952 		WARN_ON_ONCE(release_reference_nomark(vstate, id));
8953 
8954 	while ((id = idstack_pop(idstack))) {
8955 		/*
8956 		 * Child references are inaccessible after parent is released,
8957 		 * any child references that exist at this point are a leak.
8958 		 */
8959 		for (i = 0; i < vstate->acquired_refs; i++) {
8960 			if (vstate->refs[i].type != REF_TYPE_PTR)
8961 				continue;
8962 			if (vstate->refs[i].parent_id != id)
8963 				continue;
8964 			verbose(env, "Leaking reference id=%d alloc_insn=%d. Release it first.\n",
8965 				vstate->refs[i].id, vstate->refs[i].insn_idx);
8966 			return -EINVAL;
8967 		}
8968 
8969 		bpf_for_each_reg_in_vstate_mask(vstate, state, reg, stack, mask, ({
8970 			if (reg->id != id && reg->parent_id != id)
8971 				continue;
8972 
8973 			/* Free objects derived from the current object */
8974 			if (reg->parent_id == id) {
8975 				err = idstack_push(idstack, reg->id);
8976 				if (err)
8977 					return err;
8978 			}
8979 
8980 			if (!stack || stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL)
8981 				mark_reg_invalid(env, reg);
8982 			else if (stack->slot_type[BPF_REG_SIZE - 1] == STACK_DYNPTR)
8983 				invalidate_dynptr(env, stack);
8984 		}));
8985 	}
8986 
8987 	return 0;
8988 }
8989 
8990 static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
8991 {
8992 	struct bpf_func_state *unused;
8993 	struct bpf_reg_state *reg;
8994 
8995 	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
8996 		if (type_is_non_owning_ref(reg->type))
8997 			mark_reg_invalid(env, reg);
8998 	}));
8999 }
9000 
9001 static void invalidate_rcu_protected_refs(struct bpf_verifier_env *env)
9002 {
9003 	struct bpf_stack_state *stack;
9004 	struct bpf_func_state *state;
9005 	struct bpf_reg_state *reg;
9006 	u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
9007 
9008 	bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, stack, clear_mask, ({
9009 		if (reg->type & MEM_RCU) {
9010 			reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
9011 			reg->type |= PTR_UNTRUSTED;
9012 		}
9013 	}));
9014 }
9015 
9016 static int ref_convert_alloc_rcu_protected(struct bpf_verifier_env *env, u32 id)
9017 {
9018 	struct bpf_func_state *state;
9019 	struct bpf_reg_state *reg;
9020 	int err;
9021 
9022 	err = release_reference_nomark(env->cur_state, id);
9023 
9024 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9025 		if (reg->id != id)
9026 			continue;
9027 		if ((reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
9028 			reg->id = 0;
9029 			reg->type &= ~MEM_ALLOC;
9030 			reg->type |= MEM_RCU;
9031 		}
9032 	}));
9033 
9034 	return err;
9035 }
9036 
9037 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
9038 				    struct bpf_reg_state *regs)
9039 {
9040 	int i;
9041 
9042 	/* after the call registers r0 - r5 were scratched */
9043 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
9044 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
9045 		__check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
9046 	}
9047 }
9048 
9049 static void invalidate_outgoing_stack_args(const struct bpf_verifier_env *env,
9050 					   struct bpf_func_state *state)
9051 {
9052 	int i, nslots = state->out_stack_arg_cnt;
9053 
9054 	for (i = 0; i < nslots; i++)
9055 		bpf_mark_reg_not_init(env, &state->stack_arg_regs[i]);
9056 }
9057 
9058 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
9059 				   struct bpf_func_state *caller,
9060 				   struct bpf_func_state *callee,
9061 				   int insn_idx);
9062 
9063 static int set_callee_state(struct bpf_verifier_env *env,
9064 			    struct bpf_func_state *caller,
9065 			    struct bpf_func_state *callee, int insn_idx);
9066 
9067 static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
9068 			    set_callee_state_fn set_callee_state_cb,
9069 			    struct bpf_verifier_state *state)
9070 {
9071 	struct bpf_func_state *caller, *callee;
9072 	int err;
9073 
9074 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
9075 		verbose(env, "the call stack of %d frames is too deep\n",
9076 			state->curframe + 2);
9077 		return -E2BIG;
9078 	}
9079 
9080 	if (state->frame[state->curframe + 1]) {
9081 		verifier_bug(env, "Frame %d already allocated", state->curframe + 1);
9082 		return -EFAULT;
9083 	}
9084 
9085 	caller = state->frame[state->curframe];
9086 	callee = kzalloc_obj(*callee, GFP_KERNEL_ACCOUNT);
9087 	if (!callee)
9088 		return -ENOMEM;
9089 	state->frame[state->curframe + 1] = callee;
9090 
9091 	/* callee cannot access r0, r6 - r9 for reading and has to write
9092 	 * into its own stack before reading from it.
9093 	 * callee can read/write into caller's stack
9094 	 */
9095 	init_func_state(env, callee,
9096 			/* remember the callsite, it will be used by bpf_exit */
9097 			callsite,
9098 			state->curframe + 1 /* frameno within this callchain */,
9099 			subprog /* subprog number within this prog */);
9100 	err = set_callee_state_cb(env, caller, callee, callsite);
9101 	if (err)
9102 		goto err_out;
9103 
9104 	/* only increment it after check_reg_arg() finished */
9105 	state->curframe++;
9106 
9107 	return 0;
9108 
9109 err_out:
9110 	free_func_state(callee);
9111 	state->frame[state->curframe + 1] = NULL;
9112 	return err;
9113 }
9114 
9115 static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
9116 				    const struct btf *btf,
9117 				    struct bpf_reg_state *regs)
9118 {
9119 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
9120 	struct bpf_func_state *caller = cur_func(env);
9121 	struct bpf_verifier_log *log = &env->log;
9122 	struct ref_obj_desc ref_obj = {};
9123 	u32 i;
9124 	int ret, err;
9125 
9126 	ret = btf_prepare_func_args(env, subprog);
9127 	if (ret) {
9128 		if (bpf_in_stack_arg_cnt(sub) > 0) {
9129 			err = check_outgoing_stack_args(env, caller, sub->arg_cnt);
9130 			if (err)
9131 				return err;
9132 		}
9133 		return ret;
9134 	}
9135 
9136 	ret = check_outgoing_stack_args(env, caller, sub->arg_cnt);
9137 	if (ret)
9138 		return ret;
9139 
9140 	/* check that BTF function arguments match actual types that the
9141 	 * verifier sees.
9142 	 */
9143 	for (i = 0; i < sub->arg_cnt; i++) {
9144 		argno_t argno = argno_from_arg(i + 1);
9145 		struct bpf_reg_state *reg = get_func_arg_reg(caller, regs, i);
9146 		struct bpf_subprog_arg_info *arg = &sub->args[i];
9147 
9148 		if (arg->arg_type == ARG_ANYTHING) {
9149 			if (reg->type != SCALAR_VALUE) {
9150 				bpf_log(log, "%s is not a scalar\n", reg_arg_name(env, argno));
9151 				return -EINVAL;
9152 			}
9153 		} else if (arg->arg_type & PTR_UNTRUSTED) {
9154 			/*
9155 			 * Anything is allowed for untrusted arguments, as these are
9156 			 * read-only and probe read instructions would protect against
9157 			 * invalid memory access.
9158 			 */
9159 		} else if (arg->arg_type == ARG_PTR_TO_CTX) {
9160 			ret = check_func_arg_reg_off(env, reg, argno, ARG_PTR_TO_CTX);
9161 			if (ret < 0)
9162 				return ret;
9163 			/* If function expects ctx type in BTF check that caller
9164 			 * is passing PTR_TO_CTX.
9165 			 */
9166 			if (reg->type != PTR_TO_CTX) {
9167 				bpf_log(log, "%s expects pointer to ctx\n",
9168 					reg_arg_name(env, argno));
9169 				return -EINVAL;
9170 			}
9171 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
9172 			ret = check_func_arg_reg_off(env, reg, argno, ARG_DONTCARE);
9173 			if (ret < 0)
9174 				return ret;
9175 			if (check_mem_reg(env, reg, argno, arg->mem_size))
9176 				return -EINVAL;
9177 			if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) {
9178 				bpf_log(log, "%s is expected to be non-NULL\n",
9179 					reg_arg_name(env, argno));
9180 				return -EINVAL;
9181 			}
9182 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
9183 			/*
9184 			 * Can pass any value and the kernel won't crash, but
9185 			 * only PTR_TO_ARENA or SCALAR make sense. Everything
9186 			 * else is a bug in the bpf program. Point it out to
9187 			 * the user at the verification time instead of
9188 			 * run-time debug nightmare.
9189 			 */
9190 			if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) {
9191 				bpf_log(log, "%s is not a pointer to arena or scalar.\n",
9192 					reg_arg_name(env, argno));
9193 				return -EINVAL;
9194 			}
9195 		} else if (arg->arg_type == ARG_PTR_TO_DYNPTR) {
9196 			ret = check_func_arg_reg_off(env, reg, argno, ARG_PTR_TO_DYNPTR);
9197 			if (ret)
9198 				return ret;
9199 
9200 			ret = process_dynptr_func(env, reg, argno, -1, arg->arg_type, &ref_obj, NULL);
9201 			if (ret)
9202 				return ret;
9203 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
9204 			struct bpf_call_arg_meta meta;
9205 			int err;
9206 
9207 			if (bpf_register_is_null(reg) && type_may_be_null(arg->arg_type))
9208 				continue;
9209 
9210 			memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */
9211 			err = check_reg_type(env, reg, argno, arg->arg_type, &arg->btf_id, &meta);
9212 			err = err ?: check_func_arg_reg_off(env, reg, argno, arg->arg_type);
9213 			if (err)
9214 				return err;
9215 		} else {
9216 			verifier_bug(env, "unrecognized %s type %d",
9217 				     reg_arg_name(env, argno), arg->arg_type);
9218 			return -EFAULT;
9219 		}
9220 	}
9221 
9222 	return 0;
9223 }
9224 
9225 /* Compare BTF of a function call with given bpf_reg_state.
9226  * Returns:
9227  * EFAULT - there is a verifier bug. Abort verification.
9228  * EINVAL - there is a type mismatch or BTF is not available.
9229  * 0 - BTF matches with what bpf_reg_state expects.
9230  * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
9231  */
9232 static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
9233 				  struct bpf_reg_state *regs)
9234 {
9235 	struct bpf_prog *prog = env->prog;
9236 	struct btf *btf = prog->aux->btf;
9237 	u32 btf_id;
9238 	int err;
9239 
9240 	if (!prog->aux->func_info)
9241 		return -EINVAL;
9242 
9243 	btf_id = prog->aux->func_info[subprog].type_id;
9244 	if (!btf_id)
9245 		return -EFAULT;
9246 
9247 	if (prog->aux->func_info_aux[subprog].unreliable)
9248 		return -EINVAL;
9249 
9250 	err = btf_check_func_arg_match(env, subprog, btf, regs);
9251 	/* Compiler optimizations can remove arguments from static functions
9252 	 * or mismatched type can be passed into a global function.
9253 	 * In such cases mark the function as unreliable from BTF point of view.
9254 	 */
9255 	if (err)
9256 		prog->aux->func_info_aux[subprog].unreliable = true;
9257 	return err;
9258 }
9259 
9260 static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9261 			      int insn_idx, int subprog,
9262 			      set_callee_state_fn set_callee_state_cb)
9263 {
9264 	struct bpf_verifier_state *state = env->cur_state, *callback_state;
9265 	struct bpf_func_state *caller, *callee;
9266 	int err;
9267 
9268 	caller = state->frame[state->curframe];
9269 	err = btf_check_subprog_call(env, subprog, caller->regs);
9270 	if (err == -EFAULT)
9271 		return err;
9272 
9273 	/* set_callee_state is used for direct subprog calls, but we are
9274 	 * interested in validating only BPF helpers that can call subprogs as
9275 	 * callbacks
9276 	 */
9277 	env->subprog_info[subprog].is_cb = true;
9278 	if (bpf_pseudo_kfunc_call(insn) &&
9279 	    !is_callback_calling_kfunc(insn->imm)) {
9280 		verifier_bug(env, "kfunc %s#%d not marked as callback-calling",
9281 			     func_id_name(insn->imm), insn->imm);
9282 		return -EFAULT;
9283 	} else if (!bpf_pseudo_kfunc_call(insn) &&
9284 		   !is_callback_calling_function(insn->imm)) { /* helper */
9285 		verifier_bug(env, "helper %s#%d not marked as callback-calling",
9286 			     func_id_name(insn->imm), insn->imm);
9287 		return -EFAULT;
9288 	}
9289 
9290 	if (bpf_is_async_callback_calling_insn(insn)) {
9291 		struct bpf_verifier_state *async_cb;
9292 
9293 		/* there is no real recursion here. timer and workqueue callbacks are async */
9294 		env->subprog_info[subprog].is_async_cb = true;
9295 		async_cb = push_async_cb(env, env->subprog_info[subprog].start,
9296 					 insn_idx, subprog,
9297 					 is_async_cb_sleepable(env, insn));
9298 		if (IS_ERR(async_cb))
9299 			return PTR_ERR(async_cb);
9300 		callee = async_cb->frame[0];
9301 		callee->async_entry_cnt = caller->async_entry_cnt + 1;
9302 
9303 		/* Convert bpf_timer_set_callback() args into timer callback args */
9304 		err = set_callee_state_cb(env, caller, callee, insn_idx);
9305 		if (err)
9306 			return err;
9307 
9308 		return 0;
9309 	}
9310 
9311 	/* for callback functions enqueue entry to callback and
9312 	 * proceed with next instruction within current frame.
9313 	 */
9314 	callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false);
9315 	if (IS_ERR(callback_state))
9316 		return PTR_ERR(callback_state);
9317 
9318 	err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb,
9319 			       callback_state);
9320 	if (err)
9321 		return err;
9322 
9323 	callback_state->callback_unroll_depth++;
9324 	callback_state->frame[callback_state->curframe - 1]->callback_depth++;
9325 	caller->callback_depth = 0;
9326 	return 0;
9327 }
9328 
9329 static int process_bpf_exit_full(struct bpf_verifier_env *env,
9330 				 bool *do_print_state, bool exception_exit);
9331 
9332 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9333 			   int *insn_idx)
9334 {
9335 	struct bpf_verifier_state *state = env->cur_state;
9336 	struct bpf_subprog_info *caller_info;
9337 	u16 callee_incoming, stack_arg_cnt;
9338 	struct bpf_func_state *caller;
9339 	int err, subprog, target_insn;
9340 
9341 	target_insn = *insn_idx + insn->imm + 1;
9342 	subprog = bpf_find_subprog(env, target_insn);
9343 	if (verifier_bug_if(subprog < 0, env, "target of func call at insn %d is not a program",
9344 			    target_insn))
9345 		return -EFAULT;
9346 
9347 	caller = state->frame[state->curframe];
9348 	err = btf_check_subprog_call(env, subprog, caller->regs);
9349 	if (err == -EFAULT)
9350 		return err;
9351 	if (bpf_subprog_is_global(env, subprog)) {
9352 		const char *sub_name = subprog_name(env, subprog);
9353 
9354 		if (env->cur_state->active_locks) {
9355 			verbose(env, "global function calls are not allowed while holding a lock,\n"
9356 				     "use static function instead\n");
9357 			return -EINVAL;
9358 		}
9359 
9360 		if (env->subprog_info[subprog].might_sleep && !in_sleepable_context(env)) {
9361 			verbose(env, "sleepable global function %s() called in %s\n",
9362 				sub_name, non_sleepable_context_description(env));
9363 			return -EINVAL;
9364 		}
9365 
9366 		if (err) {
9367 			verbose(env, "Caller passes invalid args into func#%d ('%s')\n",
9368 				subprog, sub_name);
9369 			return err;
9370 		}
9371 
9372 		if (env->log.level & BPF_LOG_LEVEL)
9373 			verbose(env, "Func#%d ('%s') is global and assumed valid.\n",
9374 				subprog, sub_name);
9375 		if (env->subprog_info[subprog].changes_pkt_data)
9376 			clear_all_pkt_pointers(env);
9377 		/* mark global subprog for verifying after main prog */
9378 		subprog_aux(env, subprog)->called = true;
9379 		clear_caller_saved_regs(env, caller->regs);
9380 		invalidate_outgoing_stack_args(env, cur_func(env));
9381 
9382 		/* All non-void global functions return a 64-bit SCALAR_VALUE. */
9383 		if (!subprog_returns_void(env, subprog)) {
9384 			mark_reg_unknown(env, caller->regs, BPF_REG_0);
9385 			caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
9386 		}
9387 
9388 		if (env->subprog_info[subprog].might_throw) {
9389 			struct bpf_verifier_state *branch;
9390 
9391 			branch = push_stack(env, *insn_idx + 1, *insn_idx, false);
9392 			if (IS_ERR(branch)) {
9393 				verbose(env, "failed to push state for global subprog exception path\n");
9394 				return PTR_ERR(branch);
9395 			}
9396 			return process_bpf_exit_full(env, NULL, true);
9397 		}
9398 
9399 		/* continue with next insn after call */
9400 		return 0;
9401 	}
9402 
9403 	/*
9404 	 * Track caller's total stack arg count (incoming + max outgoing).
9405 	 * This is needed so the JIT knows how much stack arg space to allocate.
9406 	 */
9407 	caller_info = &env->subprog_info[caller->subprogno];
9408 	callee_incoming = bpf_in_stack_arg_cnt(&env->subprog_info[subprog]);
9409 	stack_arg_cnt = bpf_in_stack_arg_cnt(caller_info) + callee_incoming;
9410 	if (stack_arg_cnt > caller_info->stack_arg_cnt)
9411 		caller_info->stack_arg_cnt = stack_arg_cnt;
9412 
9413 	/* for regular function entry setup new frame and continue
9414 	 * from that frame.
9415 	 */
9416 	err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state);
9417 	if (err)
9418 		return err;
9419 
9420 	clear_caller_saved_regs(env, caller->regs);
9421 
9422 	/* and go analyze first insn of the callee */
9423 	*insn_idx = env->subprog_info[subprog].start - 1;
9424 
9425 	if (env->log.level & BPF_LOG_LEVEL) {
9426 		verbose(env, "caller:\n");
9427 		print_verifier_state(env, state, caller->frameno, true);
9428 		verbose(env, "callee:\n");
9429 		print_verifier_state(env, state, state->curframe, true);
9430 	}
9431 
9432 	return 0;
9433 }
9434 
9435 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
9436 				   struct bpf_func_state *caller,
9437 				   struct bpf_func_state *callee)
9438 {
9439 	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
9440 	 *      void *callback_ctx, u64 flags);
9441 	 * callback_fn(struct bpf_map *map, void *key, void *value,
9442 	 *      void *callback_ctx);
9443 	 */
9444 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9445 
9446 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9447 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9448 	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9449 
9450 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9451 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9452 	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9453 
9454 	/* pointer to stack or null */
9455 	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
9456 
9457 	/* unused */
9458 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9459 	return 0;
9460 }
9461 
9462 static int set_callee_state(struct bpf_verifier_env *env,
9463 			    struct bpf_func_state *caller,
9464 			    struct bpf_func_state *callee, int insn_idx)
9465 {
9466 	int i;
9467 
9468 	/* copy r1 - r5 args that callee can access.  The copy includes parent
9469 	 * pointers, which connects us up to the liveness chain
9470 	 */
9471 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
9472 		callee->regs[i] = caller->regs[i];
9473 	return 0;
9474 }
9475 
9476 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
9477 				       struct bpf_func_state *caller,
9478 				       struct bpf_func_state *callee,
9479 				       int insn_idx)
9480 {
9481 	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
9482 	struct bpf_map *map;
9483 	int err;
9484 
9485 	/* valid map_ptr and poison value does not matter */
9486 	map = insn_aux->map_ptr_state.map_ptr;
9487 	if (!map->ops->map_set_for_each_callback_args ||
9488 	    !map->ops->map_for_each_callback) {
9489 		verbose(env, "callback function not allowed for map\n");
9490 		return -ENOTSUPP;
9491 	}
9492 
9493 	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
9494 	if (err)
9495 		return err;
9496 
9497 	callee->in_callback_fn = true;
9498 	callee->callback_ret_range = retval_range(0, 1);
9499 	return 0;
9500 }
9501 
9502 static int set_loop_callback_state(struct bpf_verifier_env *env,
9503 				   struct bpf_func_state *caller,
9504 				   struct bpf_func_state *callee,
9505 				   int insn_idx)
9506 {
9507 	/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
9508 	 *	    u64 flags);
9509 	 * callback_fn(u64 index, void *callback_ctx);
9510 	 */
9511 	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
9512 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9513 
9514 	/* unused */
9515 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9516 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9517 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9518 
9519 	callee->in_callback_fn = true;
9520 	callee->callback_ret_range = retval_range(0, 1);
9521 	return 0;
9522 }
9523 
9524 static int set_timer_callback_state(struct bpf_verifier_env *env,
9525 				    struct bpf_func_state *caller,
9526 				    struct bpf_func_state *callee,
9527 				    int insn_idx)
9528 {
9529 	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
9530 
9531 	/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
9532 	 * callback_fn(struct bpf_map *map, void *key, void *value);
9533 	 */
9534 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9535 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9536 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9537 
9538 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9539 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9540 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9541 
9542 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9543 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9544 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9545 
9546 	/* unused */
9547 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9548 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9549 	callee->in_async_callback_fn = true;
9550 	callee->callback_ret_range = retval_range(0, 0);
9551 	return 0;
9552 }
9553 
9554 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
9555 				       struct bpf_func_state *caller,
9556 				       struct bpf_func_state *callee,
9557 				       int insn_idx)
9558 {
9559 	/* bpf_find_vma(struct task_struct *task, u64 addr,
9560 	 *               void *callback_fn, void *callback_ctx, u64 flags)
9561 	 * (callback_fn)(struct task_struct *task,
9562 	 *               struct vm_area_struct *vma, void *callback_ctx);
9563 	 */
9564 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9565 
9566 	callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
9567 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9568 	callee->regs[BPF_REG_2].btf =  btf_vmlinux;
9569 	callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
9570 
9571 	/* pointer to stack or null */
9572 	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
9573 
9574 	/* unused */
9575 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9576 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9577 	callee->in_callback_fn = true;
9578 	callee->callback_ret_range = retval_range(0, 1);
9579 	return 0;
9580 }
9581 
9582 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
9583 					   struct bpf_func_state *caller,
9584 					   struct bpf_func_state *callee,
9585 					   int insn_idx)
9586 {
9587 	/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
9588 	 *			  callback_ctx, u64 flags);
9589 	 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
9590 	 */
9591 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
9592 	mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
9593 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9594 
9595 	/* unused */
9596 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9597 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9598 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9599 
9600 	callee->in_callback_fn = true;
9601 	callee->callback_ret_range = retval_range(0, 1);
9602 	return 0;
9603 }
9604 
9605 static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
9606 					 struct bpf_func_state *caller,
9607 					 struct bpf_func_state *callee,
9608 					 int insn_idx)
9609 {
9610 	/* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
9611 	 *                     bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
9612 	 *
9613 	 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
9614 	 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
9615 	 * by this point, so look at 'root'
9616 	 */
9617 	struct btf_field *field;
9618 
9619 	field = reg_find_field_offset(&caller->regs[BPF_REG_1],
9620 				      caller->regs[BPF_REG_1].var_off.value,
9621 				      BPF_RB_ROOT);
9622 	if (!field || !field->graph_root.value_btf_id)
9623 		return -EFAULT;
9624 
9625 	mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
9626 	ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
9627 	mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
9628 	ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
9629 
9630 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9631 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9632 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9633 	callee->in_callback_fn = true;
9634 	callee->callback_ret_range = retval_range(0, 1);
9635 	return 0;
9636 }
9637 
9638 static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env,
9639 						 struct bpf_func_state *caller,
9640 						 struct bpf_func_state *callee,
9641 						 int insn_idx)
9642 {
9643 	struct bpf_map *map_ptr = caller->regs[BPF_REG_3].map_ptr;
9644 
9645 	/*
9646 	 * callback_fn(struct bpf_map *map, void *key, void *value);
9647 	 */
9648 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9649 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9650 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9651 
9652 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9653 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9654 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9655 
9656 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9657 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9658 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9659 
9660 	/* unused */
9661 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9662 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9663 	callee->in_async_callback_fn = true;
9664 	callee->callback_ret_range = retval_range(S32_MIN, S32_MAX);
9665 	return 0;
9666 }
9667 
9668 static bool is_rbtree_lock_required_kfunc(u32 btf_id);
9669 
9670 /* Are we currently verifying the callback for a rbtree helper that must
9671  * be called with lock held? If so, no need to complain about unreleased
9672  * lock
9673  */
9674 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
9675 {
9676 	struct bpf_verifier_state *state = env->cur_state;
9677 	struct bpf_insn *insn = env->prog->insnsi;
9678 	struct bpf_func_state *callee;
9679 	int kfunc_btf_id;
9680 
9681 	if (!state->curframe)
9682 		return false;
9683 
9684 	callee = state->frame[state->curframe];
9685 
9686 	if (!callee->in_callback_fn)
9687 		return false;
9688 
9689 	kfunc_btf_id = insn[callee->callsite].imm;
9690 	return is_rbtree_lock_required_kfunc(kfunc_btf_id);
9691 }
9692 
9693 static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg)
9694 {
9695 	if (range.return_32bit)
9696 		return range.minval <= reg_s32_min(reg) && reg_s32_max(reg) <= range.maxval;
9697 	else
9698 		return range.minval <= reg_smin(reg) && reg_smax(reg) <= range.maxval;
9699 }
9700 
9701 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
9702 {
9703 	struct bpf_verifier_state *state = env->cur_state, *prev_st;
9704 	struct bpf_func_state *caller, *callee;
9705 	struct bpf_reg_state *r0;
9706 	bool in_callback_fn;
9707 	int err;
9708 
9709 	callee = state->frame[state->curframe];
9710 	r0 = &callee->regs[BPF_REG_0];
9711 	if (r0->type == PTR_TO_STACK) {
9712 		/* technically it's ok to return caller's stack pointer
9713 		 * (or caller's caller's pointer) back to the caller,
9714 		 * since these pointers are valid. Only current stack
9715 		 * pointer will be invalid as soon as function exits,
9716 		 * but let's be conservative
9717 		 */
9718 		verbose(env, "cannot return stack pointer to the caller\n");
9719 		return -EINVAL;
9720 	}
9721 
9722 	caller = state->frame[state->curframe - 1];
9723 	if (callee->in_callback_fn) {
9724 		if (r0->type != SCALAR_VALUE) {
9725 			verbose(env, "R0 not a scalar value\n");
9726 			return -EACCES;
9727 		}
9728 
9729 		/* we are going to rely on register's precise value */
9730 		err = mark_chain_precision(env, BPF_REG_0);
9731 		if (err)
9732 			return err;
9733 
9734 		/* enforce R0 return value range, and bpf_callback_t returns 64bit */
9735 		if (!retval_range_within(callee->callback_ret_range, r0)) {
9736 			verbose_invalid_scalar(env, r0, callee->callback_ret_range,
9737 					       "At callback return", "R0");
9738 			return -EINVAL;
9739 		}
9740 		if (!bpf_calls_callback(env, callee->callsite)) {
9741 			verifier_bug(env, "in callback at %d, callsite %d !calls_callback",
9742 				     *insn_idx, callee->callsite);
9743 			return -EFAULT;
9744 		}
9745 	} else {
9746 		/* return to the caller whatever r0 had in the callee */
9747 		caller->regs[BPF_REG_0] = *r0;
9748 	}
9749 
9750 	/* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
9751 	 * there function call logic would reschedule callback visit. If iteration
9752 	 * converges is_state_visited() would prune that visit eventually.
9753 	 */
9754 	in_callback_fn = callee->in_callback_fn;
9755 	if (in_callback_fn)
9756 		*insn_idx = callee->callsite;
9757 	else
9758 		*insn_idx = callee->callsite + 1;
9759 
9760 	if (env->log.level & BPF_LOG_LEVEL) {
9761 		verbose(env, "returning from callee:\n");
9762 		print_verifier_state(env, state, callee->frameno, true);
9763 		verbose(env, "to caller at %d:\n", *insn_idx);
9764 		print_verifier_state(env, state, caller->frameno, true);
9765 	}
9766 	/* clear everything in the callee. In case of exceptional exits using
9767 	 * bpf_throw, this will be done by copy_verifier_state for extra frames. */
9768 	free_func_state(callee);
9769 	state->frame[state->curframe--] = NULL;
9770 	invalidate_outgoing_stack_args(env, caller);
9771 
9772 	/* for callbacks widen imprecise scalars to make programs like below verify:
9773 	 *
9774 	 *   struct ctx { int i; }
9775 	 *   void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
9776 	 *   ...
9777 	 *   struct ctx = { .i = 0; }
9778 	 *   bpf_loop(100, cb, &ctx, 0);
9779 	 *
9780 	 * This is similar to what is done in process_iter_next_call() for open
9781 	 * coded iterators.
9782 	 */
9783 	prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL;
9784 	if (prev_st) {
9785 		err = widen_imprecise_scalars(env, prev_st, state);
9786 		if (err)
9787 			return err;
9788 	}
9789 	return 0;
9790 }
9791 
9792 static int do_refine_retval_range(struct bpf_verifier_env *env,
9793 				  struct bpf_reg_state *regs, int ret_type,
9794 				  int func_id,
9795 				  struct bpf_call_arg_meta *meta)
9796 {
9797 	struct bpf_retval_range range;
9798 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
9799 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
9800 
9801 	if (ret_type != RET_INTEGER)
9802 		return 0;
9803 
9804 	switch (func_id) {
9805 	case BPF_FUNC_get_stack:
9806 	case BPF_FUNC_get_task_stack:
9807 	case BPF_FUNC_probe_read_str:
9808 	case BPF_FUNC_probe_read_kernel_str:
9809 	case BPF_FUNC_probe_read_user_str:
9810 		reg_set_srange64(ret_reg, -MAX_ERRNO, meta->msize_max_value);
9811 		reg_set_srange32(ret_reg, -MAX_ERRNO, meta->msize_max_value);
9812 		reg_bounds_sync(ret_reg);
9813 		break;
9814 	case BPF_FUNC_get_smp_processor_id:
9815 		reg_set_urange64(ret_reg, 0, nr_cpu_ids - 1);
9816 		reg_set_urange32(ret_reg, 0, nr_cpu_ids - 1);
9817 		reg_bounds_sync(ret_reg);
9818 		break;
9819 	case BPF_FUNC_get_retval:
9820 		/*
9821 		 * bpf_get_retval may see arbitrary value passed by bpf_prog_run_array_cg for
9822 		 * CGROUP_GETSOCKOPT type.
9823 		 */
9824 		if (prog_type == BPF_PROG_TYPE_CGROUP_SOCKOPT &&
9825 		    env->prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT)
9826 			break;
9827 
9828 		if (prog_type == BPF_PROG_TYPE_LSM &&
9829 		    env->prog->expected_attach_type == BPF_LSM_CGROUP) {
9830 			if (!env->prog->aux->attach_func_proto->type)
9831 				break;
9832 			bpf_lsm_get_retval_range(env->prog, &range);
9833 		} else {
9834 			range.minval = -MAX_ERRNO;
9835 			range.maxval = 0;
9836 		}
9837 
9838 		reg_set_srange64(ret_reg, range.minval, range.maxval);
9839 		reg_set_srange32(ret_reg, range.minval, range.maxval);
9840 		reg_bounds_sync(ret_reg);
9841 		break;
9842 	}
9843 
9844 	return reg_bounds_sanity_check(env, ret_reg, "retval");
9845 }
9846 
9847 static int
9848 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
9849 		int func_id, int insn_idx)
9850 {
9851 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9852 	struct bpf_map *map = meta->map.ptr;
9853 
9854 	if (func_id != BPF_FUNC_tail_call &&
9855 	    func_id != BPF_FUNC_map_lookup_elem &&
9856 	    func_id != BPF_FUNC_map_update_elem &&
9857 	    func_id != BPF_FUNC_map_delete_elem &&
9858 	    func_id != BPF_FUNC_map_push_elem &&
9859 	    func_id != BPF_FUNC_map_pop_elem &&
9860 	    func_id != BPF_FUNC_map_peek_elem &&
9861 	    func_id != BPF_FUNC_for_each_map_elem &&
9862 	    func_id != BPF_FUNC_redirect_map &&
9863 	    func_id != BPF_FUNC_map_lookup_percpu_elem)
9864 		return 0;
9865 
9866 	if (map == NULL) {
9867 		verifier_bug(env, "expected map for helper call");
9868 		return -EFAULT;
9869 	}
9870 
9871 	/* In case of read-only, some additional restrictions
9872 	 * need to be applied in order to prevent altering the
9873 	 * state of the map from program side.
9874 	 */
9875 	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
9876 	    (func_id == BPF_FUNC_map_delete_elem ||
9877 	     func_id == BPF_FUNC_map_update_elem ||
9878 	     func_id == BPF_FUNC_map_push_elem ||
9879 	     func_id == BPF_FUNC_map_pop_elem)) {
9880 		verbose(env, "write into map forbidden\n");
9881 		return -EACCES;
9882 	}
9883 
9884 	if (!aux->map_ptr_state.map_ptr)
9885 		bpf_map_ptr_store(aux, meta->map.ptr,
9886 				  !meta->map.ptr->bypass_spec_v1, false);
9887 	else if (aux->map_ptr_state.map_ptr != meta->map.ptr)
9888 		bpf_map_ptr_store(aux, meta->map.ptr,
9889 				  !meta->map.ptr->bypass_spec_v1, true);
9890 	return 0;
9891 }
9892 
9893 static int
9894 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
9895 		int func_id, int insn_idx)
9896 {
9897 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9898 	struct bpf_reg_state *reg;
9899 	struct bpf_map *map = meta->map.ptr;
9900 	u64 val, max;
9901 	int err;
9902 
9903 	if (func_id != BPF_FUNC_tail_call)
9904 		return 0;
9905 	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
9906 		verbose(env, "expected prog array map for tail call");
9907 		return -EINVAL;
9908 	}
9909 
9910 	reg = reg_state(env, BPF_REG_3);
9911 	val = reg->var_off.value;
9912 	max = map->max_entries;
9913 
9914 	if (!(is_reg_const(reg, false) && val < max)) {
9915 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
9916 		return 0;
9917 	}
9918 
9919 	err = mark_chain_precision(env, BPF_REG_3);
9920 	if (err)
9921 		return err;
9922 	if (bpf_map_key_unseen(aux))
9923 		bpf_map_key_store(aux, val);
9924 	else if (!bpf_map_key_poisoned(aux) &&
9925 		  bpf_map_key_immediate(aux) != val)
9926 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
9927 	return 0;
9928 }
9929 
9930 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
9931 {
9932 	struct bpf_verifier_state *state = env->cur_state;
9933 	enum bpf_prog_type type = resolve_prog_type(env->prog);
9934 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
9935 	bool refs_lingering = false;
9936 	int i;
9937 
9938 	if (!exception_exit && cur_func(env)->frameno)
9939 		return 0;
9940 
9941 	for (i = 0; i < state->acquired_refs; i++) {
9942 		if (state->refs[i].type != REF_TYPE_PTR)
9943 			continue;
9944 		/* Allow struct_ops programs to return a referenced kptr back to
9945 		 * kernel. Type checks are performed later in check_return_code.
9946 		 */
9947 		if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit &&
9948 		    reg->id == state->refs[i].id)
9949 			continue;
9950 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
9951 			state->refs[i].id, state->refs[i].insn_idx);
9952 		refs_lingering = true;
9953 	}
9954 	return refs_lingering ? -EINVAL : 0;
9955 }
9956 
9957 static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit, bool check_lock, const char *prefix)
9958 {
9959 	int err;
9960 
9961 	if (check_lock && env->cur_state->active_locks) {
9962 		verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix);
9963 		return -EINVAL;
9964 	}
9965 
9966 	err = check_reference_leak(env, exception_exit);
9967 	if (err) {
9968 		verbose(env, "%s would lead to reference leak\n", prefix);
9969 		return err;
9970 	}
9971 
9972 	if (check_lock && env->cur_state->active_irq_id) {
9973 		verbose(env, "%s cannot be used inside bpf_local_irq_save-ed region\n", prefix);
9974 		return -EINVAL;
9975 	}
9976 
9977 	if (check_lock && env->cur_state->active_rcu_locks) {
9978 		verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix);
9979 		return -EINVAL;
9980 	}
9981 
9982 	if (check_lock && env->cur_state->active_preempt_locks) {
9983 		verbose(env, "%s cannot be used inside bpf_preempt_disable-ed region\n", prefix);
9984 		return -EINVAL;
9985 	}
9986 
9987 	return 0;
9988 }
9989 
9990 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
9991 				   struct bpf_reg_state *regs)
9992 {
9993 	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
9994 	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
9995 	struct bpf_map *fmt_map = fmt_reg->map_ptr;
9996 	struct bpf_bprintf_data data = {};
9997 	int err, fmt_map_off, num_args;
9998 	u64 fmt_addr;
9999 	char *fmt;
10000 
10001 	/* data must be an array of u64 */
10002 	if (data_len_reg->var_off.value % 8)
10003 		return -EINVAL;
10004 	num_args = data_len_reg->var_off.value / 8;
10005 
10006 	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
10007 	 * and map_direct_value_addr is set.
10008 	 */
10009 	fmt_map_off = fmt_reg->var_off.value;
10010 	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
10011 						  fmt_map_off);
10012 	if (err) {
10013 		verbose(env, "failed to retrieve map value address\n");
10014 		return -EFAULT;
10015 	}
10016 	fmt = (char *)(long)fmt_addr + fmt_map_off;
10017 
10018 	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
10019 	 * can focus on validating the format specifiers.
10020 	 */
10021 	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
10022 	if (err < 0)
10023 		verbose(env, "Invalid format string\n");
10024 
10025 	return err;
10026 }
10027 
10028 static int check_get_func_ip(struct bpf_verifier_env *env)
10029 {
10030 	enum bpf_prog_type type = resolve_prog_type(env->prog);
10031 	int func_id = BPF_FUNC_get_func_ip;
10032 
10033 	if (type == BPF_PROG_TYPE_TRACING) {
10034 		if (!bpf_prog_has_trampoline(env->prog)) {
10035 			verbose(env, "func %s#%d supported only for fentry/fexit/fsession/fmod_ret programs\n",
10036 				func_id_name(func_id), func_id);
10037 			return -ENOTSUPP;
10038 		}
10039 		return 0;
10040 	} else if (type == BPF_PROG_TYPE_KPROBE) {
10041 		return 0;
10042 	}
10043 
10044 	verbose(env, "func %s#%d not supported for program type %d\n",
10045 		func_id_name(func_id), func_id, type);
10046 	return -ENOTSUPP;
10047 }
10048 
10049 static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env)
10050 {
10051 	return &env->insn_aux_data[env->insn_idx];
10052 }
10053 
10054 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
10055 {
10056 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_4);
10057 	bool reg_is_null = bpf_register_is_null(reg);
10058 
10059 	if (reg_is_null)
10060 		mark_chain_precision(env, BPF_REG_4);
10061 
10062 	return reg_is_null;
10063 }
10064 
10065 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
10066 {
10067 	struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
10068 
10069 	if (!state->initialized) {
10070 		state->initialized = 1;
10071 		state->fit_for_inline = loop_flag_is_zero(env);
10072 		state->callback_subprogno = subprogno;
10073 		return;
10074 	}
10075 
10076 	if (!state->fit_for_inline)
10077 		return;
10078 
10079 	state->fit_for_inline = (loop_flag_is_zero(env) &&
10080 				 state->callback_subprogno == subprogno);
10081 }
10082 
10083 /* Returns whether or not the given map can potentially elide
10084  * lookup return value nullness check. This is possible if the key
10085  * is statically known.
10086  */
10087 static bool can_elide_value_nullness(const struct bpf_map *map)
10088 {
10089 	if (map->map_flags & BPF_F_INNER_MAP)
10090 		return false;
10091 
10092 	switch (map->map_type) {
10093 	case BPF_MAP_TYPE_ARRAY:
10094 	case BPF_MAP_TYPE_PERCPU_ARRAY:
10095 		return true;
10096 	default:
10097 		return false;
10098 	}
10099 }
10100 
10101 int bpf_get_helper_proto(struct bpf_verifier_env *env, int func_id,
10102 			 const struct bpf_func_proto **ptr)
10103 {
10104 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID)
10105 		return -ERANGE;
10106 
10107 	if (!env->ops->get_func_proto)
10108 		return -EINVAL;
10109 
10110 	*ptr = env->ops->get_func_proto(func_id, env->prog);
10111 	return *ptr && (*ptr)->func ? 0 : -EINVAL;
10112 }
10113 
10114 /* Check if we're in a sleepable context. */
10115 static inline bool in_sleepable_context(struct bpf_verifier_env *env)
10116 {
10117 	return !env->cur_state->active_rcu_locks &&
10118 	       !env->cur_state->active_preempt_locks &&
10119 	       !env->cur_state->active_locks &&
10120 	       !env->cur_state->active_irq_id &&
10121 	       in_sleepable(env);
10122 }
10123 
10124 static const char *non_sleepable_context_description(struct bpf_verifier_env *env)
10125 {
10126 	if (env->cur_state->active_rcu_locks)
10127 		return "rcu_read_lock region";
10128 	if (env->cur_state->active_preempt_locks)
10129 		return "non-preemptible region";
10130 	if (env->cur_state->active_irq_id)
10131 		return "IRQ-disabled region";
10132 	if (env->cur_state->active_locks)
10133 		return "lock region";
10134 	return "non-sleepable prog";
10135 }
10136 
10137 static int release_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
10138 		       bool convert_rcu, bool release_dynptr)
10139 {
10140 	int err = -EINVAL;
10141 
10142 	if (bpf_register_is_null(reg))
10143 		return 0;
10144 
10145 	if (release_dynptr)
10146 		err = unmark_stack_slots_dynptr(env, reg);
10147 	else if (convert_rcu)
10148 		err = ref_convert_alloc_rcu_protected(env, reg->id);
10149 	else if (reg_is_referenced(env, reg))
10150 		err = release_reference(env, reg->id);
10151 
10152 	return err;
10153 }
10154 
10155 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10156 			     int *insn_idx_p)
10157 {
10158 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
10159 	bool returns_cpu_specific_alloc_ptr = false;
10160 	const struct bpf_func_proto *fn = NULL;
10161 	enum bpf_return_type ret_type;
10162 	enum bpf_type_flag ret_flag;
10163 	struct bpf_reg_state *regs;
10164 	struct bpf_call_arg_meta meta;
10165 	int insn_idx = *insn_idx_p;
10166 	bool changes_data;
10167 	int i, err, func_id;
10168 
10169 	/* find function prototype */
10170 	func_id = insn->imm;
10171 	err = bpf_get_helper_proto(env, insn->imm, &fn);
10172 	if (err == -ERANGE) {
10173 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
10174 		return -EINVAL;
10175 	}
10176 
10177 	if (err) {
10178 		verbose(env, "program of this type cannot use helper %s#%d\n",
10179 			func_id_name(func_id), func_id);
10180 		return err;
10181 	}
10182 
10183 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
10184 	if (!env->prog->gpl_compatible && fn->gpl_only) {
10185 		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
10186 		return -EINVAL;
10187 	}
10188 
10189 	if (fn->allowed && !fn->allowed(env->prog)) {
10190 		verbose(env, "helper call is not allowed in probe\n");
10191 		return -EINVAL;
10192 	}
10193 
10194 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
10195 	changes_data = bpf_helper_changes_pkt_data(func_id);
10196 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
10197 		verifier_bug(env, "func %s#%d: r1 != ctx", func_id_name(func_id), func_id);
10198 		return -EFAULT;
10199 	}
10200 
10201 	memset(&meta, 0, sizeof(meta));
10202 	meta.pkt_access = fn->pkt_access;
10203 
10204 	err = check_func_proto(fn, &meta);
10205 	if (err) {
10206 		verifier_bug(env, "incorrect func proto %s#%d", func_id_name(func_id), func_id);
10207 		return err;
10208 	}
10209 
10210 	if (fn->might_sleep && !in_sleepable_context(env)) {
10211 		verbose(env, "sleepable helper %s#%d in %s\n", func_id_name(func_id), func_id,
10212 			non_sleepable_context_description(env));
10213 		return -EINVAL;
10214 	}
10215 
10216 	/* Track non-sleepable context for helpers. */
10217 	if (!in_sleepable_context(env))
10218 		env->insn_aux_data[insn_idx].non_sleepable = true;
10219 
10220 	meta.func_id = func_id;
10221 	/* check args */
10222 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
10223 		err = check_func_arg(env, i, &meta, fn, insn_idx);
10224 		if (err)
10225 			return err;
10226 	}
10227 
10228 	err = record_func_map(env, &meta, func_id, insn_idx);
10229 	if (err)
10230 		return err;
10231 
10232 	err = record_func_key(env, &meta, func_id, insn_idx);
10233 	if (err)
10234 		return err;
10235 
10236 	regs = cur_regs(env);
10237 
10238 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
10239 	 * is inferred from register state.
10240 	 */
10241 	for (i = 0; i < meta.access_size; i++) {
10242 		err = check_mem_access(env, insn_idx, regs + meta.regno, argno_from_reg(meta.regno), i, BPF_B,
10243 				       BPF_WRITE, -1, false, false);
10244 		if (err)
10245 			return err;
10246 	}
10247 
10248 	if (meta.release_regno) {
10249 		struct bpf_reg_state *reg = &regs[meta.release_regno];
10250 		bool convert_rcu = (func_id == BPF_FUNC_kptr_xchg) && in_rcu_cs(env) &&
10251 				   (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU);
10252 
10253 		err = release_reg(env, reg, convert_rcu, !!meta.dynptr.id);
10254 		if (err)
10255 			return err;
10256 	}
10257 
10258 	switch (func_id) {
10259 	case BPF_FUNC_tail_call:
10260 		err = check_resource_leak(env, false, true, "tail_call");
10261 		if (err)
10262 			return err;
10263 		break;
10264 	case BPF_FUNC_get_local_storage:
10265 		/* check that flags argument in get_local_storage(map, flags) is 0,
10266 		 * this is required because get_local_storage() can't return an error.
10267 		 */
10268 		if (!bpf_register_is_null(&regs[BPF_REG_2])) {
10269 			verbose(env, "get_local_storage() doesn't support non-zero flags\n");
10270 			return -EINVAL;
10271 		}
10272 		break;
10273 	case BPF_FUNC_for_each_map_elem:
10274 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10275 					 set_map_elem_callback_state);
10276 		break;
10277 	case BPF_FUNC_timer_set_callback:
10278 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10279 					 set_timer_callback_state);
10280 		break;
10281 	case BPF_FUNC_find_vma:
10282 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10283 					 set_find_vma_callback_state);
10284 		break;
10285 	case BPF_FUNC_snprintf:
10286 		err = check_bpf_snprintf_call(env, regs);
10287 		break;
10288 	case BPF_FUNC_loop:
10289 		update_loop_inline_state(env, meta.subprogno);
10290 		/* Verifier relies on R1 value to determine if bpf_loop() iteration
10291 		 * is finished, thus mark it precise.
10292 		 */
10293 		err = mark_chain_precision(env, BPF_REG_1);
10294 		if (err)
10295 			return err;
10296 		if (cur_func(env)->callback_depth < reg_umax(&regs[BPF_REG_1])) {
10297 			err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10298 						 set_loop_callback_state);
10299 		} else {
10300 			cur_func(env)->callback_depth = 0;
10301 			if (env->log.level & BPF_LOG_LEVEL2)
10302 				verbose(env, "frame%d bpf_loop iteration limit reached\n",
10303 					env->cur_state->curframe);
10304 		}
10305 		break;
10306 	case BPF_FUNC_dynptr_from_mem:
10307 		if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
10308 			verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
10309 				reg_type_str(env, regs[BPF_REG_1].type));
10310 			return -EACCES;
10311 		}
10312 		break;
10313 	case BPF_FUNC_set_retval:
10314 	{
10315 		struct bpf_retval_range range = {
10316 			.minval = -MAX_ERRNO,
10317 			.maxval = 0,
10318 			.return_32bit = true
10319 		};
10320 		struct bpf_reg_state *r1 = &regs[BPF_REG_1];
10321 
10322 		if (r1->type != SCALAR_VALUE) {
10323 			verbose(env, "R1 is not a scalar\n");
10324 			return -EINVAL;
10325 		}
10326 
10327 		/* CGROUP_GETSOCKOPT is allowed to return arbitrary value */
10328 		if (prog_type == BPF_PROG_TYPE_CGROUP_SOCKOPT &&
10329 		    env->prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT)
10330 			break;
10331 
10332 		if (prog_type == BPF_PROG_TYPE_LSM &&
10333 		    env->prog->expected_attach_type == BPF_LSM_CGROUP) {
10334 			if (!env->prog->aux->attach_func_proto->type) {
10335 				/* Make sure programs that attach to void
10336 				 * hooks don't try to modify return value.
10337 				 */
10338 				verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
10339 				return -EINVAL;
10340 			}
10341 			bpf_lsm_get_retval_range(env->prog, &range);
10342 		}
10343 
10344 		err = mark_chain_precision(env, BPF_REG_1);
10345 		if (err)
10346 			return err;
10347 
10348 		if (!retval_range_within(range, r1)) {
10349 			verbose_invalid_scalar(env, r1, range, "At bpf_set_retval", "R1");
10350 			return -EINVAL;
10351 		}
10352 
10353 		break;
10354 	}
10355 	case BPF_FUNC_dynptr_write:
10356 	{
10357 		enum bpf_dynptr_type dynptr_type = meta.dynptr.type;
10358 
10359 		if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
10360 			return -EFAULT;
10361 
10362 		if (dynptr_type == BPF_DYNPTR_TYPE_SKB ||
10363 		    dynptr_type == BPF_DYNPTR_TYPE_SKB_META)
10364 			/* this will trigger clear_all_pkt_pointers(), which will
10365 			 * invalidate all dynptr slices associated with the skb
10366 			 */
10367 			changes_data = true;
10368 
10369 		break;
10370 	}
10371 	case BPF_FUNC_per_cpu_ptr:
10372 	case BPF_FUNC_this_cpu_ptr:
10373 	{
10374 		struct bpf_reg_state *reg = &regs[BPF_REG_1];
10375 		const struct btf_type *type;
10376 
10377 		if (reg->type & MEM_RCU) {
10378 			type = btf_type_by_id(reg->btf, reg->btf_id);
10379 			if (!type || !btf_type_is_struct(type)) {
10380 				verbose(env, "Helper has invalid btf/btf_id in R1\n");
10381 				return -EFAULT;
10382 			}
10383 			returns_cpu_specific_alloc_ptr = true;
10384 			env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true;
10385 		}
10386 		break;
10387 	}
10388 	case BPF_FUNC_user_ringbuf_drain:
10389 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10390 					 set_user_ringbuf_callback_state);
10391 		break;
10392 	}
10393 
10394 	if (err)
10395 		return err;
10396 
10397 	/* reset caller saved regs */
10398 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
10399 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
10400 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
10401 	}
10402 	invalidate_outgoing_stack_args(env, cur_func(env));
10403 
10404 	/* helper call returns 64-bit value. */
10405 	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
10406 
10407 	/* update return register (already marked as written above) */
10408 	ret_type = fn->ret_type;
10409 	ret_flag = type_flag(ret_type);
10410 
10411 	switch (base_type(ret_type)) {
10412 	case RET_INTEGER:
10413 		/* sets type to SCALAR_VALUE */
10414 		mark_reg_unknown(env, regs, BPF_REG_0);
10415 		break;
10416 	case RET_VOID:
10417 		regs[BPF_REG_0].type = NOT_INIT;
10418 		break;
10419 	case RET_PTR_TO_MAP_VALUE:
10420 		/* There is no offset yet applied, variable or fixed */
10421 		mark_reg_known_zero(env, regs, BPF_REG_0);
10422 		/* remember map_ptr, so that check_map_access()
10423 		 * can check 'value_size' boundary of memory access
10424 		 * to map element returned from bpf_map_lookup_elem()
10425 		 */
10426 		if (meta.map.ptr == NULL) {
10427 			verifier_bug(env, "unexpected null map_ptr");
10428 			return -EFAULT;
10429 		}
10430 
10431 		if (func_id == BPF_FUNC_map_lookup_elem &&
10432 		    can_elide_value_nullness(meta.map.ptr) &&
10433 		    meta.const_map_key >= 0 &&
10434 		    meta.const_map_key < meta.map.ptr->max_entries)
10435 			ret_flag &= ~PTR_MAYBE_NULL;
10436 
10437 		regs[BPF_REG_0].map_ptr = meta.map.ptr;
10438 		regs[BPF_REG_0].map_uid = meta.map.uid;
10439 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
10440 		if (!type_may_be_null(ret_flag) &&
10441 		    btf_record_has_field(meta.map.ptr->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
10442 			regs[BPF_REG_0].id = ++env->id_gen;
10443 		}
10444 		break;
10445 	case RET_PTR_TO_SOCKET:
10446 		mark_reg_known_zero(env, regs, BPF_REG_0);
10447 		regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
10448 		break;
10449 	case RET_PTR_TO_SOCK_COMMON:
10450 		mark_reg_known_zero(env, regs, BPF_REG_0);
10451 		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
10452 		break;
10453 	case RET_PTR_TO_TCP_SOCK:
10454 		mark_reg_known_zero(env, regs, BPF_REG_0);
10455 		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
10456 		break;
10457 	case RET_PTR_TO_MEM:
10458 		mark_reg_known_zero(env, regs, BPF_REG_0);
10459 		regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10460 		regs[BPF_REG_0].mem_size = meta.mem_size;
10461 		break;
10462 	case RET_PTR_TO_MEM_OR_BTF_ID:
10463 	{
10464 		const struct btf_type *t;
10465 
10466 		mark_reg_known_zero(env, regs, BPF_REG_0);
10467 		t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
10468 		if (!btf_type_is_struct(t)) {
10469 			u32 tsize;
10470 			const struct btf_type *ret;
10471 			const char *tname;
10472 
10473 			/* resolve the type size of ksym. */
10474 			ret = btf_resolve_size(meta.ret_btf, t, &tsize);
10475 			if (IS_ERR(ret)) {
10476 				tname = btf_name_by_offset(meta.ret_btf, t->name_off);
10477 				verbose(env, "unable to resolve the size of type '%s': %ld\n",
10478 					tname, PTR_ERR(ret));
10479 				return -EINVAL;
10480 			}
10481 			regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10482 			regs[BPF_REG_0].mem_size = tsize;
10483 		} else {
10484 			if (returns_cpu_specific_alloc_ptr) {
10485 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU;
10486 			} else {
10487 				/* MEM_RDONLY may be carried from ret_flag, but it
10488 				 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
10489 				 * it will confuse the check of PTR_TO_BTF_ID in
10490 				 * check_mem_access().
10491 				 */
10492 				ret_flag &= ~MEM_RDONLY;
10493 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10494 			}
10495 
10496 			regs[BPF_REG_0].btf = meta.ret_btf;
10497 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
10498 		}
10499 		break;
10500 	}
10501 	case RET_PTR_TO_BTF_ID:
10502 	{
10503 		struct btf *ret_btf;
10504 		int ret_btf_id;
10505 
10506 		mark_reg_known_zero(env, regs, BPF_REG_0);
10507 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10508 		if (func_id == BPF_FUNC_kptr_xchg) {
10509 			ret_btf = meta.kptr_field->kptr.btf;
10510 			ret_btf_id = meta.kptr_field->kptr.btf_id;
10511 			if (!btf_is_kernel(ret_btf)) {
10512 				regs[BPF_REG_0].type |= MEM_ALLOC;
10513 				if (meta.kptr_field->type == BPF_KPTR_PERCPU)
10514 					regs[BPF_REG_0].type |= MEM_PERCPU;
10515 			}
10516 		} else {
10517 			if (fn->ret_btf_id == BPF_PTR_POISON) {
10518 				verifier_bug(env, "func %s has non-overwritten BPF_PTR_POISON return type",
10519 					     func_id_name(func_id));
10520 				return -EFAULT;
10521 			}
10522 			ret_btf = btf_vmlinux;
10523 			ret_btf_id = *fn->ret_btf_id;
10524 		}
10525 		if (ret_btf_id == 0) {
10526 			verbose(env, "invalid return type %u of func %s#%d\n",
10527 				base_type(ret_type), func_id_name(func_id),
10528 				func_id);
10529 			return -EINVAL;
10530 		}
10531 		regs[BPF_REG_0].btf = ret_btf;
10532 		regs[BPF_REG_0].btf_id = ret_btf_id;
10533 		break;
10534 	}
10535 	default:
10536 		verbose(env, "unknown return type %u of func %s#%d\n",
10537 			base_type(ret_type), func_id_name(func_id), func_id);
10538 		return -EINVAL;
10539 	}
10540 
10541 	if (type_may_be_null(regs[BPF_REG_0].type))
10542 		regs[BPF_REG_0].id = ++env->id_gen;
10543 
10544 	if (is_ptr_cast_function(func_id) &&
10545 	    find_reference_state(env->cur_state, meta.ref_obj.id)) {
10546 		struct bpf_verifier_state *branch;
10547 		struct bpf_reg_state *r0;
10548 
10549 		err = validate_ref_obj(env, &meta.ref_obj);
10550 		if (err)
10551 			return err;
10552 
10553 		/*
10554 		 * In order for a release of any of the original or cast pointers
10555 		 * to invalidate all other pointers, reuse the same reference id for
10556 		 * the cast result.
10557 		 * This reference id can't be used for nullness propagation,
10558 		 * as cast might return NULL for a non-NULL input.
10559 		 * Hence, explore the NULL case as a separate branch.
10560 		 */
10561 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
10562 		if (IS_ERR(branch))
10563 			return PTR_ERR(branch);
10564 
10565 		r0 = &branch->frame[branch->curframe]->regs[BPF_REG_0];
10566 		__mark_reg_known_zero(r0);
10567 		r0->type = SCALAR_VALUE;
10568 
10569 		regs[BPF_REG_0].type &= ~PTR_MAYBE_NULL;
10570 		regs[BPF_REG_0].id = meta.ref_obj.id;
10571 	} else if (is_acquire_function(func_id, meta.map.ptr)) {
10572 		int id = acquire_reference(env, insn_idx, 0);
10573 
10574 		if (id < 0)
10575 			return id;
10576 
10577 		regs[BPF_REG_0].id = id;
10578 	}
10579 
10580 	if (func_id == BPF_FUNC_dynptr_data)
10581 		regs[BPF_REG_0].parent_id = meta.dynptr.id;
10582 
10583 	err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
10584 	if (err)
10585 		return err;
10586 
10587 	err = check_map_func_compatibility(env, meta.map.ptr, func_id);
10588 	if (err)
10589 		return err;
10590 
10591 	if ((func_id == BPF_FUNC_get_stack ||
10592 	     func_id == BPF_FUNC_get_task_stack) &&
10593 	    !env->prog->has_callchain_buf) {
10594 		const char *err_str;
10595 
10596 #ifdef CONFIG_PERF_EVENTS
10597 		err = get_callchain_buffers(sysctl_perf_event_max_stack);
10598 		err_str = "cannot get callchain buffer for func %s#%d\n";
10599 #else
10600 		err = -ENOTSUPP;
10601 		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
10602 #endif
10603 		if (err) {
10604 			verbose(env, err_str, func_id_name(func_id), func_id);
10605 			return err;
10606 		}
10607 
10608 		env->prog->has_callchain_buf = true;
10609 	}
10610 
10611 	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
10612 		env->prog->call_get_stack = true;
10613 
10614 	if (func_id == BPF_FUNC_get_func_ip) {
10615 		if (check_get_func_ip(env))
10616 			return -ENOTSUPP;
10617 		env->prog->call_get_func_ip = true;
10618 	}
10619 
10620 	if (func_id == BPF_FUNC_tail_call) {
10621 		if (env->cur_state->curframe) {
10622 			struct bpf_verifier_state *branch;
10623 
10624 			mark_reg_scratched(env, BPF_REG_0);
10625 			branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
10626 			if (IS_ERR(branch))
10627 				return PTR_ERR(branch);
10628 			clear_all_pkt_pointers(env);
10629 			mark_reg_unknown(env, regs, BPF_REG_0);
10630 			err = prepare_func_exit(env, &env->insn_idx);
10631 			if (err)
10632 				return err;
10633 			env->insn_idx--;
10634 		} else {
10635 			changes_data = false;
10636 		}
10637 	}
10638 
10639 	if (changes_data)
10640 		clear_all_pkt_pointers(env);
10641 	return 0;
10642 }
10643 
10644 /* mark_btf_func_reg_size() is used when the reg size is determined by
10645  * the BTF func_proto's return value size and argument.
10646  */
10647 static void __mark_btf_func_reg_size(struct bpf_verifier_env *env, struct bpf_reg_state *regs,
10648 				     u32 regno, size_t reg_size)
10649 {
10650 	struct bpf_reg_state *reg = &regs[regno];
10651 
10652 	if (regno == BPF_REG_0) {
10653 		/* Function return value */
10654 		reg->subreg_def = reg_size == sizeof(u64) ?
10655 			DEF_NOT_SUBREG : env->insn_idx + 1;
10656 	} else if (reg_size == sizeof(u64)) {
10657 		/* Function argument */
10658 		mark_insn_zext(env, reg);
10659 	}
10660 }
10661 
10662 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
10663 				   size_t reg_size)
10664 {
10665 	return __mark_btf_func_reg_size(env, cur_regs(env), regno, reg_size);
10666 }
10667 
10668 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
10669 {
10670 	return meta->kfunc_flags & KF_ACQUIRE;
10671 }
10672 
10673 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
10674 {
10675 	return meta->kfunc_flags & KF_RELEASE;
10676 }
10677 
10678 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
10679 {
10680 	return meta->kfunc_flags & KF_DESTRUCTIVE;
10681 }
10682 
10683 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
10684 {
10685 	return meta->kfunc_flags & KF_RCU;
10686 }
10687 
10688 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
10689 {
10690 	return meta->kfunc_flags & KF_RCU_PROTECTED;
10691 }
10692 
10693 static bool is_kfunc_arg_mem_size(const struct btf *btf,
10694 				  const struct btf_param *arg,
10695 				  const struct bpf_reg_state *reg)
10696 {
10697 	const struct btf_type *t;
10698 
10699 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10700 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10701 		return false;
10702 
10703 	return btf_param_match_suffix(btf, arg, "__sz");
10704 }
10705 
10706 static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
10707 					const struct btf_param *arg,
10708 					const struct bpf_reg_state *reg)
10709 {
10710 	const struct btf_type *t;
10711 
10712 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10713 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10714 		return false;
10715 
10716 	return btf_param_match_suffix(btf, arg, "__szk");
10717 }
10718 
10719 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
10720 {
10721 	return btf_param_match_suffix(btf, arg, "__k");
10722 }
10723 
10724 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
10725 {
10726 	return btf_param_match_suffix(btf, arg, "__ign");
10727 }
10728 
10729 static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg)
10730 {
10731 	return btf_param_match_suffix(btf, arg, "__map");
10732 }
10733 
10734 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
10735 {
10736 	return btf_param_match_suffix(btf, arg, "__alloc");
10737 }
10738 
10739 static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
10740 {
10741 	return btf_param_match_suffix(btf, arg, "__uninit");
10742 }
10743 
10744 static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg)
10745 {
10746 	return btf_param_match_suffix(btf, arg, "__refcounted_kptr");
10747 }
10748 
10749 static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg)
10750 {
10751 	return btf_param_match_suffix(btf, arg, "__nullable");
10752 }
10753 
10754 static bool is_kfunc_arg_nonown_allowed(const struct btf *btf, const struct btf_param *arg)
10755 {
10756 	return btf_param_match_suffix(btf, arg, "__nonown_allowed");
10757 }
10758 
10759 static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg)
10760 {
10761 	return btf_param_match_suffix(btf, arg, "__str");
10762 }
10763 
10764 static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param *arg)
10765 {
10766 	return btf_param_match_suffix(btf, arg, "__irq_flag");
10767 }
10768 
10769 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
10770 					  const struct btf_param *arg,
10771 					  const char *name)
10772 {
10773 	int len, target_len = strlen(name);
10774 	const char *param_name;
10775 
10776 	param_name = btf_name_by_offset(btf, arg->name_off);
10777 	if (str_is_empty(param_name))
10778 		return false;
10779 	len = strlen(param_name);
10780 	if (len != target_len)
10781 		return false;
10782 	if (strcmp(param_name, name))
10783 		return false;
10784 
10785 	return true;
10786 }
10787 
10788 enum {
10789 	KF_ARG_DYNPTR_ID,
10790 	KF_ARG_LIST_HEAD_ID,
10791 	KF_ARG_LIST_NODE_ID,
10792 	KF_ARG_RB_ROOT_ID,
10793 	KF_ARG_RB_NODE_ID,
10794 	KF_ARG_WORKQUEUE_ID,
10795 	KF_ARG_RES_SPIN_LOCK_ID,
10796 	KF_ARG_TASK_WORK_ID,
10797 	KF_ARG_PROG_AUX_ID,
10798 	KF_ARG_TIMER_ID
10799 };
10800 
10801 BTF_ID_LIST(kf_arg_btf_ids)
10802 BTF_ID(struct, bpf_dynptr)
10803 BTF_ID(struct, bpf_list_head)
10804 BTF_ID(struct, bpf_list_node)
10805 BTF_ID(struct, bpf_rb_root)
10806 BTF_ID(struct, bpf_rb_node)
10807 BTF_ID(struct, bpf_wq)
10808 BTF_ID(struct, bpf_res_spin_lock)
10809 BTF_ID(struct, bpf_task_work)
10810 BTF_ID(struct, bpf_prog_aux)
10811 BTF_ID(struct, bpf_timer)
10812 
10813 static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
10814 				    const struct btf_param *arg, int type)
10815 {
10816 	const struct btf_type *t;
10817 	u32 res_id;
10818 
10819 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10820 	if (!t)
10821 		return false;
10822 	if (!btf_type_is_ptr(t))
10823 		return false;
10824 	t = btf_type_skip_modifiers(btf, t->type, &res_id);
10825 	if (!t)
10826 		return false;
10827 	return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
10828 }
10829 
10830 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
10831 {
10832 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
10833 }
10834 
10835 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
10836 {
10837 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
10838 }
10839 
10840 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
10841 {
10842 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
10843 }
10844 
10845 static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
10846 {
10847 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
10848 }
10849 
10850 static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
10851 {
10852 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
10853 }
10854 
10855 static bool is_kfunc_arg_timer(const struct btf *btf, const struct btf_param *arg)
10856 {
10857 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TIMER_ID);
10858 }
10859 
10860 static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg)
10861 {
10862 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID);
10863 }
10864 
10865 static bool is_kfunc_arg_task_work(const struct btf *btf, const struct btf_param *arg)
10866 {
10867 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TASK_WORK_ID);
10868 }
10869 
10870 static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg)
10871 {
10872 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID);
10873 }
10874 
10875 static bool is_rbtree_node_type(const struct btf_type *t)
10876 {
10877 	return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_RB_NODE_ID]);
10878 }
10879 
10880 static bool is_list_node_type(const struct btf_type *t)
10881 {
10882 	return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_LIST_NODE_ID]);
10883 }
10884 
10885 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
10886 				  const struct btf_param *arg)
10887 {
10888 	const struct btf_type *t;
10889 
10890 	t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
10891 	if (!t)
10892 		return false;
10893 
10894 	return true;
10895 }
10896 
10897 static bool is_kfunc_arg_prog_aux(const struct btf *btf, const struct btf_param *arg)
10898 {
10899 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_PROG_AUX_ID);
10900 }
10901 
10902 /*
10903  * A kfunc with KF_IMPLICIT_ARGS has two prototypes in BTF:
10904  *   - the _impl prototype with full arg list (meta->func_proto)
10905  *   - the BPF API prototype w/o implicit args (func->type in BTF)
10906  * To determine whether an argument is implicit, we compare its position
10907  * against the number of arguments in the prototype w/o implicit args.
10908  */
10909 static bool is_kfunc_arg_implicit(const struct bpf_kfunc_call_arg_meta *meta, u32 arg_idx)
10910 {
10911 	const struct btf_type *func, *func_proto;
10912 	u32 argn;
10913 
10914 	if (!(meta->kfunc_flags & KF_IMPLICIT_ARGS))
10915 		return false;
10916 
10917 	func = btf_type_by_id(meta->btf, meta->func_id);
10918 	func_proto = btf_type_by_id(meta->btf, func->type);
10919 	argn = btf_type_vlen(func_proto);
10920 
10921 	return argn <= arg_idx;
10922 }
10923 
10924 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
10925 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
10926 					const struct btf *btf,
10927 					const struct btf_type *t, int rec)
10928 {
10929 	const struct btf_type *member_type;
10930 	const struct btf_member *member;
10931 	u32 i;
10932 
10933 	if (!btf_type_is_struct(t))
10934 		return false;
10935 
10936 	for_each_member(i, t, member) {
10937 		const struct btf_array *array;
10938 
10939 		member_type = btf_type_skip_modifiers(btf, member->type, NULL);
10940 		if (btf_type_is_struct(member_type)) {
10941 			if (rec >= 3) {
10942 				verbose(env, "max struct nesting depth exceeded\n");
10943 				return false;
10944 			}
10945 			if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
10946 				return false;
10947 			continue;
10948 		}
10949 		if (btf_type_is_array(member_type)) {
10950 			array = btf_array(member_type);
10951 			if (!array->nelems)
10952 				return false;
10953 			member_type = btf_type_skip_modifiers(btf, array->type, NULL);
10954 			if (!btf_type_is_scalar(member_type))
10955 				return false;
10956 			continue;
10957 		}
10958 		if (!btf_type_is_scalar(member_type))
10959 			return false;
10960 	}
10961 	return true;
10962 }
10963 
10964 enum kfunc_ptr_arg_type {
10965 	KF_ARG_PTR_TO_CTX,
10966 	KF_ARG_PTR_TO_ALLOC_BTF_ID,    /* Allocated object */
10967 	KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */
10968 	KF_ARG_PTR_TO_DYNPTR,
10969 	KF_ARG_PTR_TO_ITER,
10970 	KF_ARG_PTR_TO_LIST_HEAD,
10971 	KF_ARG_PTR_TO_LIST_NODE,
10972 	KF_ARG_PTR_TO_BTF_ID,	       /* Also covers reg2btf_ids conversions */
10973 	KF_ARG_PTR_TO_MEM,
10974 	KF_ARG_PTR_TO_MEM_SIZE,	       /* Size derived from next argument, skip it */
10975 	KF_ARG_PTR_TO_CALLBACK,
10976 	KF_ARG_PTR_TO_RB_ROOT,
10977 	KF_ARG_PTR_TO_RB_NODE,
10978 	KF_ARG_PTR_TO_NULL,
10979 	KF_ARG_PTR_TO_CONST_STR,
10980 	KF_ARG_PTR_TO_MAP,
10981 	KF_ARG_PTR_TO_TIMER,
10982 	KF_ARG_PTR_TO_WORKQUEUE,
10983 	KF_ARG_PTR_TO_IRQ_FLAG,
10984 	KF_ARG_PTR_TO_RES_SPIN_LOCK,
10985 	KF_ARG_PTR_TO_TASK_WORK,
10986 };
10987 
10988 enum special_kfunc_type {
10989 	KF_bpf_obj_new_impl,
10990 	KF_bpf_obj_new,
10991 	KF_bpf_obj_drop_impl,
10992 	KF_bpf_obj_drop,
10993 	KF_bpf_refcount_acquire_impl,
10994 	KF_bpf_refcount_acquire,
10995 	KF_bpf_list_push_front_impl,
10996 	KF_bpf_list_push_front,
10997 	KF_bpf_list_push_back_impl,
10998 	KF_bpf_list_push_back,
10999 	KF_bpf_list_add,
11000 	KF_bpf_list_pop_front,
11001 	KF_bpf_list_pop_back,
11002 	KF_bpf_list_del,
11003 	KF_bpf_list_front,
11004 	KF_bpf_list_back,
11005 	KF_bpf_list_is_first,
11006 	KF_bpf_list_is_last,
11007 	KF_bpf_list_empty,
11008 	KF_bpf_cast_to_kern_ctx,
11009 	KF_bpf_rdonly_cast,
11010 	KF_bpf_rcu_read_lock,
11011 	KF_bpf_rcu_read_unlock,
11012 	KF_bpf_rbtree_remove,
11013 	KF_bpf_rbtree_add_impl,
11014 	KF_bpf_rbtree_add,
11015 	KF_bpf_rbtree_first,
11016 	KF_bpf_rbtree_root,
11017 	KF_bpf_rbtree_left,
11018 	KF_bpf_rbtree_right,
11019 	KF_bpf_dynptr_from_skb,
11020 	KF_bpf_dynptr_from_xdp,
11021 	KF_bpf_dynptr_from_skb_meta,
11022 	KF_bpf_xdp_pull_data,
11023 	KF_bpf_dynptr_slice,
11024 	KF_bpf_dynptr_slice_rdwr,
11025 	KF_bpf_dynptr_clone,
11026 	KF_bpf_percpu_obj_new_impl,
11027 	KF_bpf_percpu_obj_new,
11028 	KF_bpf_percpu_obj_drop_impl,
11029 	KF_bpf_percpu_obj_drop,
11030 	KF_bpf_throw,
11031 	KF_bpf_wq_set_callback,
11032 	KF_bpf_preempt_disable,
11033 	KF_bpf_preempt_enable,
11034 	KF_bpf_iter_css_task_new,
11035 	KF_bpf_session_cookie,
11036 	KF_bpf_get_kmem_cache,
11037 	KF_bpf_local_irq_save,
11038 	KF_bpf_local_irq_restore,
11039 	KF_bpf_iter_num_new,
11040 	KF_bpf_iter_num_next,
11041 	KF_bpf_iter_num_destroy,
11042 	KF_bpf_set_dentry_xattr,
11043 	KF_bpf_remove_dentry_xattr,
11044 	KF_bpf_res_spin_lock,
11045 	KF_bpf_res_spin_unlock,
11046 	KF_bpf_res_spin_lock_irqsave,
11047 	KF_bpf_res_spin_unlock_irqrestore,
11048 	KF_bpf_dynptr_from_file,
11049 	KF_bpf_dynptr_file_discard,
11050 	KF___bpf_trap,
11051 	KF_bpf_task_work_schedule_signal,
11052 	KF_bpf_task_work_schedule_resume,
11053 	KF_bpf_arena_alloc_pages,
11054 	KF_bpf_arena_free_pages,
11055 	KF_bpf_arena_reserve_pages,
11056 	KF_bpf_session_is_return,
11057 	KF_bpf_stream_vprintk,
11058 	KF_bpf_stream_print_stack,
11059 };
11060 
11061 BTF_ID_LIST(special_kfunc_list)
11062 BTF_ID(func, bpf_obj_new_impl)
11063 BTF_ID(func, bpf_obj_new)
11064 BTF_ID(func, bpf_obj_drop_impl)
11065 BTF_ID(func, bpf_obj_drop)
11066 BTF_ID(func, bpf_refcount_acquire_impl)
11067 BTF_ID(func, bpf_refcount_acquire)
11068 BTF_ID(func, bpf_list_push_front_impl)
11069 BTF_ID(func, bpf_list_push_front)
11070 BTF_ID(func, bpf_list_push_back_impl)
11071 BTF_ID(func, bpf_list_push_back)
11072 BTF_ID(func, bpf_list_add)
11073 BTF_ID(func, bpf_list_pop_front)
11074 BTF_ID(func, bpf_list_pop_back)
11075 BTF_ID(func, bpf_list_del)
11076 BTF_ID(func, bpf_list_front)
11077 BTF_ID(func, bpf_list_back)
11078 BTF_ID(func, bpf_list_is_first)
11079 BTF_ID(func, bpf_list_is_last)
11080 BTF_ID(func, bpf_list_empty)
11081 BTF_ID(func, bpf_cast_to_kern_ctx)
11082 BTF_ID(func, bpf_rdonly_cast)
11083 BTF_ID(func, bpf_rcu_read_lock)
11084 BTF_ID(func, bpf_rcu_read_unlock)
11085 BTF_ID(func, bpf_rbtree_remove)
11086 BTF_ID(func, bpf_rbtree_add_impl)
11087 BTF_ID(func, bpf_rbtree_add)
11088 BTF_ID(func, bpf_rbtree_first)
11089 BTF_ID(func, bpf_rbtree_root)
11090 BTF_ID(func, bpf_rbtree_left)
11091 BTF_ID(func, bpf_rbtree_right)
11092 #ifdef CONFIG_NET
11093 BTF_ID(func, bpf_dynptr_from_skb)
11094 BTF_ID(func, bpf_dynptr_from_xdp)
11095 BTF_ID(func, bpf_dynptr_from_skb_meta)
11096 BTF_ID(func, bpf_xdp_pull_data)
11097 #else
11098 BTF_ID_UNUSED
11099 BTF_ID_UNUSED
11100 BTF_ID_UNUSED
11101 BTF_ID_UNUSED
11102 #endif
11103 BTF_ID(func, bpf_dynptr_slice)
11104 BTF_ID(func, bpf_dynptr_slice_rdwr)
11105 BTF_ID(func, bpf_dynptr_clone)
11106 BTF_ID(func, bpf_percpu_obj_new_impl)
11107 BTF_ID(func, bpf_percpu_obj_new)
11108 BTF_ID(func, bpf_percpu_obj_drop_impl)
11109 BTF_ID(func, bpf_percpu_obj_drop)
11110 BTF_ID(func, bpf_throw)
11111 BTF_ID(func, bpf_wq_set_callback)
11112 BTF_ID(func, bpf_preempt_disable)
11113 BTF_ID(func, bpf_preempt_enable)
11114 #ifdef CONFIG_CGROUPS
11115 BTF_ID(func, bpf_iter_css_task_new)
11116 #else
11117 BTF_ID_UNUSED
11118 #endif
11119 #ifdef CONFIG_BPF_EVENTS
11120 BTF_ID(func, bpf_session_cookie)
11121 #else
11122 BTF_ID_UNUSED
11123 #endif
11124 BTF_ID(func, bpf_get_kmem_cache)
11125 BTF_ID(func, bpf_local_irq_save)
11126 BTF_ID(func, bpf_local_irq_restore)
11127 BTF_ID(func, bpf_iter_num_new)
11128 BTF_ID(func, bpf_iter_num_next)
11129 BTF_ID(func, bpf_iter_num_destroy)
11130 #ifdef CONFIG_BPF_LSM
11131 BTF_ID(func, bpf_set_dentry_xattr)
11132 BTF_ID(func, bpf_remove_dentry_xattr)
11133 #else
11134 BTF_ID_UNUSED
11135 BTF_ID_UNUSED
11136 #endif
11137 BTF_ID(func, bpf_res_spin_lock)
11138 BTF_ID(func, bpf_res_spin_unlock)
11139 BTF_ID(func, bpf_res_spin_lock_irqsave)
11140 BTF_ID(func, bpf_res_spin_unlock_irqrestore)
11141 BTF_ID(func, bpf_dynptr_from_file)
11142 BTF_ID(func, bpf_dynptr_file_discard)
11143 BTF_ID(func, __bpf_trap)
11144 BTF_ID(func, bpf_task_work_schedule_signal)
11145 BTF_ID(func, bpf_task_work_schedule_resume)
11146 BTF_ID(func, bpf_arena_alloc_pages)
11147 BTF_ID(func, bpf_arena_free_pages)
11148 BTF_ID(func, bpf_arena_reserve_pages)
11149 #ifdef CONFIG_BPF_EVENTS
11150 BTF_ID(func, bpf_session_is_return)
11151 #else
11152 BTF_ID_UNUSED
11153 #endif
11154 BTF_ID(func, bpf_stream_vprintk)
11155 BTF_ID(func, bpf_stream_print_stack)
11156 
11157 static bool is_bpf_obj_new_kfunc(u32 func_id)
11158 {
11159 	return func_id == special_kfunc_list[KF_bpf_obj_new] ||
11160 	       func_id == special_kfunc_list[KF_bpf_obj_new_impl];
11161 }
11162 
11163 static bool is_bpf_percpu_obj_new_kfunc(u32 func_id)
11164 {
11165 	return func_id == special_kfunc_list[KF_bpf_percpu_obj_new] ||
11166 	       func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl];
11167 }
11168 
11169 static bool is_bpf_obj_drop_kfunc(u32 func_id)
11170 {
11171 	return func_id == special_kfunc_list[KF_bpf_obj_drop] ||
11172 	       func_id == special_kfunc_list[KF_bpf_obj_drop_impl];
11173 }
11174 
11175 static bool is_bpf_percpu_obj_drop_kfunc(u32 func_id)
11176 {
11177 	return func_id == special_kfunc_list[KF_bpf_percpu_obj_drop] ||
11178 	       func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl];
11179 }
11180 
11181 static bool is_bpf_refcount_acquire_kfunc(u32 func_id)
11182 {
11183 	return func_id == special_kfunc_list[KF_bpf_refcount_acquire] ||
11184 	       func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
11185 }
11186 
11187 static bool is_bpf_list_push_kfunc(u32 func_id)
11188 {
11189 	return func_id == special_kfunc_list[KF_bpf_list_push_front] ||
11190 	       func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
11191 	       func_id == special_kfunc_list[KF_bpf_list_push_back] ||
11192 	       func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
11193 	       func_id == special_kfunc_list[KF_bpf_list_add];
11194 }
11195 
11196 static bool is_bpf_rbtree_add_kfunc(u32 func_id)
11197 {
11198 	return func_id == special_kfunc_list[KF_bpf_rbtree_add] ||
11199 	       func_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
11200 }
11201 
11202 static bool is_task_work_add_kfunc(u32 func_id)
11203 {
11204 	return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] ||
11205 	       func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume];
11206 }
11207 
11208 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
11209 {
11210 	if (is_bpf_refcount_acquire_kfunc(meta->func_id) && meta->arg_owning_ref)
11211 		return false;
11212 
11213 	return meta->kfunc_flags & KF_RET_NULL;
11214 }
11215 
11216 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
11217 {
11218 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
11219 }
11220 
11221 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
11222 {
11223 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
11224 }
11225 
11226 static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta)
11227 {
11228 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable];
11229 }
11230 
11231 static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
11232 {
11233 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
11234 }
11235 
11236 bool bpf_is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
11237 {
11238 	return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
11239 }
11240 
11241 static enum kfunc_ptr_arg_type
11242 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, struct bpf_func_state *caller,
11243 		       struct bpf_reg_state *regs, struct bpf_kfunc_call_arg_meta *meta,
11244 		       const struct btf_type *t, const struct btf_type *ref_t,
11245 		       const char *ref_tname, const struct btf_param *args,
11246 		       int arg, int nargs, argno_t argno, struct bpf_reg_state *reg)
11247 {
11248 	bool arg_mem_size = false;
11249 
11250 	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
11251 	    meta->func_id == special_kfunc_list[KF_bpf_session_is_return] ||
11252 	    meta->func_id == special_kfunc_list[KF_bpf_session_cookie])
11253 		return KF_ARG_PTR_TO_CTX;
11254 
11255 	if (arg + 1 < nargs &&
11256 	    (is_kfunc_arg_mem_size(meta->btf, &args[arg + 1], get_func_arg_reg(caller, regs, arg + 1)) ||
11257 	     is_kfunc_arg_const_mem_size(meta->btf, &args[arg + 1], get_func_arg_reg(caller, regs, arg + 1))))
11258 		arg_mem_size = true;
11259 
11260 	/* In this function, we verify the kfunc's BTF as per the argument type,
11261 	 * leaving the rest of the verification with respect to the register
11262 	 * type to our caller. When a set of conditions hold in the BTF type of
11263 	 * arguments, we resolve it to a known kfunc_ptr_arg_type.
11264 	 */
11265 	if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), arg))
11266 		return KF_ARG_PTR_TO_CTX;
11267 
11268 	if (is_kfunc_arg_nullable(meta->btf, &args[arg]) && bpf_register_is_null(reg) &&
11269 	    !arg_mem_size)
11270 		return KF_ARG_PTR_TO_NULL;
11271 
11272 	if (is_kfunc_arg_alloc_obj(meta->btf, &args[arg]))
11273 		return KF_ARG_PTR_TO_ALLOC_BTF_ID;
11274 
11275 	if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[arg]))
11276 		return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
11277 
11278 	if (is_kfunc_arg_dynptr(meta->btf, &args[arg]))
11279 		return KF_ARG_PTR_TO_DYNPTR;
11280 
11281 	if (is_kfunc_arg_iter(meta, arg, &args[arg]))
11282 		return KF_ARG_PTR_TO_ITER;
11283 
11284 	if (is_kfunc_arg_list_head(meta->btf, &args[arg]))
11285 		return KF_ARG_PTR_TO_LIST_HEAD;
11286 
11287 	if (is_kfunc_arg_list_node(meta->btf, &args[arg]))
11288 		return KF_ARG_PTR_TO_LIST_NODE;
11289 
11290 	if (is_kfunc_arg_rbtree_root(meta->btf, &args[arg]))
11291 		return KF_ARG_PTR_TO_RB_ROOT;
11292 
11293 	if (is_kfunc_arg_rbtree_node(meta->btf, &args[arg]))
11294 		return KF_ARG_PTR_TO_RB_NODE;
11295 
11296 	if (is_kfunc_arg_const_str(meta->btf, &args[arg]))
11297 		return KF_ARG_PTR_TO_CONST_STR;
11298 
11299 	if (is_kfunc_arg_map(meta->btf, &args[arg]))
11300 		return KF_ARG_PTR_TO_MAP;
11301 
11302 	if (is_kfunc_arg_wq(meta->btf, &args[arg]))
11303 		return KF_ARG_PTR_TO_WORKQUEUE;
11304 
11305 	if (is_kfunc_arg_timer(meta->btf, &args[arg]))
11306 		return KF_ARG_PTR_TO_TIMER;
11307 
11308 	if (is_kfunc_arg_task_work(meta->btf, &args[arg]))
11309 		return KF_ARG_PTR_TO_TASK_WORK;
11310 
11311 	if (is_kfunc_arg_irq_flag(meta->btf, &args[arg]))
11312 		return KF_ARG_PTR_TO_IRQ_FLAG;
11313 
11314 	if (is_kfunc_arg_res_spin_lock(meta->btf, &args[arg]))
11315 		return KF_ARG_PTR_TO_RES_SPIN_LOCK;
11316 
11317 	if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
11318 		if (!btf_type_is_struct(ref_t)) {
11319 			verbose(env, "kernel function %s %s pointer type %s %s is not supported\n",
11320 				meta->func_name, reg_arg_name(env, argno),
11321 				btf_type_str(ref_t), ref_tname);
11322 			return -EINVAL;
11323 		}
11324 		return KF_ARG_PTR_TO_BTF_ID;
11325 	}
11326 
11327 	if (is_kfunc_arg_callback(env, meta->btf, &args[arg]))
11328 		return KF_ARG_PTR_TO_CALLBACK;
11329 
11330 	/* This is the catch all argument type of register types supported by
11331 	 * check_helper_mem_access. However, we only allow when argument type is
11332 	 * pointer to scalar, or struct composed (recursively) of scalars. When
11333 	 * arg_mem_size is true, the pointer can be void *.
11334 	 */
11335 	if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
11336 	    (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
11337 		verbose(env, "%s pointer type %s %s must point to %sscalar, or struct with scalar\n",
11338 			reg_arg_name(env, argno),
11339 			btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
11340 		return -EINVAL;
11341 	}
11342 	return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
11343 }
11344 
11345 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
11346 					struct bpf_reg_state *reg,
11347 					const struct btf_type *ref_t,
11348 					const char *ref_tname, u32 ref_id,
11349 					struct bpf_kfunc_call_arg_meta *meta,
11350 					int arg, argno_t argno)
11351 {
11352 	const struct btf_type *reg_ref_t;
11353 	bool strict_type_match = false;
11354 	const struct btf *reg_btf;
11355 	const char *reg_ref_tname;
11356 	bool taking_projection;
11357 	bool struct_same;
11358 	u32 reg_ref_id;
11359 
11360 	if (base_type(reg->type) == PTR_TO_BTF_ID) {
11361 		reg_btf = reg->btf;
11362 		reg_ref_id = reg->btf_id;
11363 	} else {
11364 		reg_btf = btf_vmlinux;
11365 		reg_ref_id = *reg2btf_ids[base_type(reg->type)];
11366 	}
11367 
11368 	/* Enforce strict type matching for calls to kfuncs that are acquiring
11369 	 * or releasing a reference, or are no-cast aliases. We do _not_
11370 	 * enforce strict matching for kfuncs by default,
11371 	 * as we want to enable BPF programs to pass types that are bitwise
11372 	 * equivalent without forcing them to explicitly cast with something
11373 	 * like bpf_cast_to_kern_ctx().
11374 	 *
11375 	 * For example, say we had a type like the following:
11376 	 *
11377 	 * struct bpf_cpumask {
11378 	 *	cpumask_t cpumask;
11379 	 *	refcount_t usage;
11380 	 * };
11381 	 *
11382 	 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
11383 	 * to a struct cpumask, so it would be safe to pass a struct
11384 	 * bpf_cpumask * to a kfunc expecting a struct cpumask *.
11385 	 *
11386 	 * The philosophy here is similar to how we allow scalars of different
11387 	 * types to be passed to kfuncs as long as the size is the same. The
11388 	 * only difference here is that we're simply allowing
11389 	 * btf_struct_ids_match() to walk the struct at the 0th offset, and
11390 	 * resolve types.
11391 	 */
11392 	if ((is_kfunc_release(meta) && reg_is_referenced(env, reg)) ||
11393 	    btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
11394 		strict_type_match = true;
11395 
11396 	WARN_ON_ONCE(is_kfunc_release(meta) && !tnum_is_const(reg->var_off));
11397 
11398 	reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
11399 	reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
11400 	struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->var_off.value,
11401 					   meta->btf, ref_id, strict_type_match);
11402 	/* If kfunc is accepting a projection type (ie. __sk_buff), it cannot
11403 	 * actually use it -- it must cast to the underlying type. So we allow
11404 	 * caller to pass in the underlying type.
11405 	 */
11406 	taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname);
11407 	if (!taking_projection && !struct_same) {
11408 		verbose(env, "kernel function %s %s expected pointer to %s %s but %s has a pointer to %s %s\n",
11409 			meta->func_name, reg_arg_name(env, argno),
11410 			btf_type_str(ref_t), ref_tname, reg_arg_name(env, argno),
11411 			btf_type_str(reg_ref_t), reg_ref_tname);
11412 		return -EINVAL;
11413 	}
11414 	return 0;
11415 }
11416 
11417 static int process_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
11418 			     struct bpf_kfunc_call_arg_meta *meta)
11419 {
11420 	int err, spi, kfunc_class = IRQ_NATIVE_KFUNC;
11421 	bool irq_save;
11422 
11423 	if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save] ||
11424 	    meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) {
11425 		irq_save = true;
11426 		if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
11427 			kfunc_class = IRQ_LOCK_KFUNC;
11428 	} else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore] ||
11429 		   meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) {
11430 		irq_save = false;
11431 		if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
11432 			kfunc_class = IRQ_LOCK_KFUNC;
11433 	} else {
11434 		verifier_bug(env, "unknown irq flags kfunc");
11435 		return -EFAULT;
11436 	}
11437 
11438 	if (irq_save) {
11439 		if (!is_irq_flag_reg_valid_uninit(env, reg)) {
11440 			verbose(env, "expected uninitialized irq flag as %s\n",
11441 				reg_arg_name(env, argno));
11442 			return -EINVAL;
11443 		}
11444 
11445 		err = check_mem_access(env, env->insn_idx, reg, argno, 0, BPF_DW,
11446 				       BPF_WRITE, -1, false, false);
11447 		if (err)
11448 			return err;
11449 
11450 		err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx, kfunc_class);
11451 		if (err)
11452 			return err;
11453 	} else {
11454 		err = is_irq_flag_reg_valid_init(env, reg);
11455 		if (err) {
11456 			verbose(env, "expected an initialized irq flag as %s\n",
11457 				reg_arg_name(env, argno));
11458 			return err;
11459 		}
11460 
11461 		spi = irq_flag_get_spi(env, reg);
11462 		if (spi < 0)
11463 			return spi;
11464 
11465 		mark_stack_slots_scratched(env, spi, 1);
11466 
11467 		err = unmark_stack_slot_irq_flag(env, reg, kfunc_class);
11468 		if (err)
11469 			return err;
11470 	}
11471 	return 0;
11472 }
11473 
11474 
11475 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
11476 {
11477 	struct btf_record *rec = reg_btf_record(reg);
11478 
11479 	if (!env->cur_state->active_locks) {
11480 		verifier_bug(env, "%s w/o active lock", __func__);
11481 		return -EFAULT;
11482 	}
11483 
11484 	if (type_flag(reg->type) & NON_OWN_REF) {
11485 		verifier_bug(env, "NON_OWN_REF already set");
11486 		return -EFAULT;
11487 	}
11488 
11489 	reg->type |= NON_OWN_REF;
11490 	if (rec->refcount_off >= 0)
11491 		reg->type |= MEM_RCU;
11492 
11493 	return 0;
11494 }
11495 
11496 static void ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 id)
11497 {
11498 	struct bpf_func_state *unused;
11499 	struct bpf_reg_state *reg;
11500 
11501 	WARN_ON_ONCE(release_reference_nomark(env->cur_state, id));
11502 
11503 	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
11504 		if (reg->id == id) {
11505 			reg->id = 0;
11506 			ref_set_non_owning(env, reg);
11507 		}
11508 	}));
11509 
11510 	return;
11511 }
11512 
11513 /* Implementation details:
11514  *
11515  * Each register points to some region of memory, which we define as an
11516  * allocation. Each allocation may embed a bpf_spin_lock which protects any
11517  * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
11518  * allocation. The lock and the data it protects are colocated in the same
11519  * memory region.
11520  *
11521  * Hence, everytime a register holds a pointer value pointing to such
11522  * allocation, the verifier preserves a unique reg->id for it.
11523  *
11524  * The verifier remembers the lock 'ptr' and the lock 'id' whenever
11525  * bpf_spin_lock is called.
11526  *
11527  * To enable this, lock state in the verifier captures two values:
11528  *	active_lock.ptr = Register's type specific pointer
11529  *	active_lock.id  = A unique ID for each register pointer value
11530  *
11531  * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
11532  * supported register types.
11533  *
11534  * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
11535  * allocated objects is the reg->btf pointer.
11536  *
11537  * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
11538  * can establish the provenance of the map value statically for each distinct
11539  * lookup into such maps. They always contain a single map value hence unique
11540  * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
11541  *
11542  * So, in case of global variables, they use array maps with max_entries = 1,
11543  * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
11544  * into the same map value as max_entries is 1, as described above).
11545  *
11546  * In case of inner map lookups, the inner map pointer has same map_ptr as the
11547  * outer map pointer (in verifier context), but each lookup into an inner map
11548  * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
11549  * maps from the same outer map share the same map_ptr as active_lock.ptr, they
11550  * will get different reg->id assigned to each lookup, hence different
11551  * active_lock.id.
11552  *
11553  * In case of allocated objects, active_lock.ptr is the reg->btf, and the
11554  * reg->id is a unique ID preserved after the NULL pointer check on the pointer
11555  * returned from bpf_obj_new. Each allocation receives a new reg->id.
11556  */
11557 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
11558 {
11559 	struct bpf_reference_state *s;
11560 	void *ptr;
11561 	u32 id;
11562 
11563 	switch ((int)reg->type) {
11564 	case PTR_TO_MAP_VALUE:
11565 		ptr = reg->map_ptr;
11566 		break;
11567 	case PTR_TO_BTF_ID | MEM_ALLOC:
11568 		ptr = reg->btf;
11569 		break;
11570 	default:
11571 		verifier_bug(env, "unknown reg type for lock check");
11572 		return -EFAULT;
11573 	}
11574 	id = reg->id;
11575 
11576 	if (!env->cur_state->active_locks)
11577 		return -EINVAL;
11578 	s = find_lock_state(env->cur_state, REF_TYPE_LOCK_MASK, id, ptr);
11579 	if (!s) {
11580 		verbose(env, "held lock and object are not in the same allocation\n");
11581 		return -EINVAL;
11582 	}
11583 	return 0;
11584 }
11585 
11586 static bool is_bpf_list_api_kfunc(u32 btf_id)
11587 {
11588 	return is_bpf_list_push_kfunc(btf_id) ||
11589 	       btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
11590 	       btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
11591 	       btf_id == special_kfunc_list[KF_bpf_list_del] ||
11592 	       btf_id == special_kfunc_list[KF_bpf_list_front] ||
11593 	       btf_id == special_kfunc_list[KF_bpf_list_back] ||
11594 	       btf_id == special_kfunc_list[KF_bpf_list_is_first] ||
11595 	       btf_id == special_kfunc_list[KF_bpf_list_is_last] ||
11596 	       btf_id == special_kfunc_list[KF_bpf_list_empty];
11597 }
11598 
11599 static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
11600 {
11601 	return is_bpf_rbtree_add_kfunc(btf_id) ||
11602 	       btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11603 	       btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
11604 	       btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
11605 	       btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
11606 	       btf_id == special_kfunc_list[KF_bpf_rbtree_right];
11607 }
11608 
11609 static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
11610 {
11611 	return btf_id == special_kfunc_list[KF_bpf_iter_num_new] ||
11612 	       btf_id == special_kfunc_list[KF_bpf_iter_num_next] ||
11613 	       btf_id == special_kfunc_list[KF_bpf_iter_num_destroy];
11614 }
11615 
11616 static bool is_bpf_graph_api_kfunc(u32 btf_id)
11617 {
11618 	return is_bpf_list_api_kfunc(btf_id) ||
11619 	       is_bpf_rbtree_api_kfunc(btf_id) ||
11620 	       is_bpf_refcount_acquire_kfunc(btf_id);
11621 }
11622 
11623 static bool is_bpf_res_spin_lock_kfunc(u32 btf_id)
11624 {
11625 	return btf_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
11626 	       btf_id == special_kfunc_list[KF_bpf_res_spin_unlock] ||
11627 	       btf_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
11628 	       btf_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore];
11629 }
11630 
11631 static bool is_bpf_arena_kfunc(u32 btf_id)
11632 {
11633 	return btf_id == special_kfunc_list[KF_bpf_arena_alloc_pages] ||
11634 	       btf_id == special_kfunc_list[KF_bpf_arena_free_pages] ||
11635 	       btf_id == special_kfunc_list[KF_bpf_arena_reserve_pages];
11636 }
11637 
11638 static bool is_bpf_stream_kfunc(u32 btf_id)
11639 {
11640 	return btf_id == special_kfunc_list[KF_bpf_stream_vprintk] ||
11641 	       btf_id == special_kfunc_list[KF_bpf_stream_print_stack];
11642 }
11643 
11644 static bool kfunc_spin_allowed(u32 btf_id)
11645 {
11646 	return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id) ||
11647 	       is_bpf_res_spin_lock_kfunc(btf_id) || is_bpf_arena_kfunc(btf_id) ||
11648 	       is_bpf_stream_kfunc(btf_id);
11649 }
11650 
11651 static bool is_sync_callback_calling_kfunc(u32 btf_id)
11652 {
11653 	return is_bpf_rbtree_add_kfunc(btf_id);
11654 }
11655 
11656 static bool is_async_callback_calling_kfunc(u32 btf_id)
11657 {
11658 	return is_bpf_wq_set_callback_kfunc(btf_id) ||
11659 	       is_task_work_add_kfunc(btf_id);
11660 }
11661 
11662 bool bpf_is_throw_kfunc(struct bpf_insn *insn)
11663 {
11664 	return bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
11665 	       insn->imm == special_kfunc_list[KF_bpf_throw];
11666 }
11667 
11668 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id)
11669 {
11670 	return btf_id == special_kfunc_list[KF_bpf_wq_set_callback];
11671 }
11672 
11673 static bool is_callback_calling_kfunc(u32 btf_id)
11674 {
11675 	return is_sync_callback_calling_kfunc(btf_id) ||
11676 	       is_async_callback_calling_kfunc(btf_id);
11677 }
11678 
11679 static bool is_rbtree_lock_required_kfunc(u32 btf_id)
11680 {
11681 	return is_bpf_rbtree_api_kfunc(btf_id);
11682 }
11683 
11684 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
11685 					  enum btf_field_type head_field_type,
11686 					  u32 kfunc_btf_id)
11687 {
11688 	bool ret;
11689 
11690 	switch (head_field_type) {
11691 	case BPF_LIST_HEAD:
11692 		ret = is_bpf_list_api_kfunc(kfunc_btf_id);
11693 		break;
11694 	case BPF_RB_ROOT:
11695 		ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
11696 		break;
11697 	default:
11698 		verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
11699 			btf_field_type_name(head_field_type));
11700 		return false;
11701 	}
11702 
11703 	if (!ret)
11704 		verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
11705 			btf_field_type_name(head_field_type));
11706 	return ret;
11707 }
11708 
11709 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
11710 					  enum btf_field_type node_field_type,
11711 					  u32 kfunc_btf_id)
11712 {
11713 	bool ret;
11714 
11715 	switch (node_field_type) {
11716 	case BPF_LIST_NODE:
11717 		ret = is_bpf_list_push_kfunc(kfunc_btf_id) ||
11718 		      kfunc_btf_id == special_kfunc_list[KF_bpf_list_del] ||
11719 		      kfunc_btf_id == special_kfunc_list[KF_bpf_list_is_first] ||
11720 		      kfunc_btf_id == special_kfunc_list[KF_bpf_list_is_last];
11721 		break;
11722 	case BPF_RB_NODE:
11723 		ret = (is_bpf_rbtree_add_kfunc(kfunc_btf_id) ||
11724 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11725 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
11726 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
11727 		break;
11728 	default:
11729 		verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
11730 			btf_field_type_name(node_field_type));
11731 		return false;
11732 	}
11733 
11734 	if (!ret)
11735 		verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
11736 			btf_field_type_name(node_field_type));
11737 	return ret;
11738 }
11739 
11740 static int
11741 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
11742 				   struct bpf_reg_state *reg, argno_t argno,
11743 				   struct bpf_kfunc_call_arg_meta *meta,
11744 				   enum btf_field_type head_field_type,
11745 				   struct btf_field **head_field)
11746 {
11747 	const char *head_type_name;
11748 	struct btf_field *field;
11749 	struct btf_record *rec;
11750 	u32 head_off;
11751 
11752 	if (meta->btf != btf_vmlinux) {
11753 		verifier_bug(env, "unexpected btf mismatch in kfunc call");
11754 		return -EFAULT;
11755 	}
11756 
11757 	if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
11758 		return -EFAULT;
11759 
11760 	head_type_name = btf_field_type_name(head_field_type);
11761 	if (!tnum_is_const(reg->var_off)) {
11762 		verbose(env,
11763 			"%s doesn't have constant offset. %s has to be at the constant offset\n",
11764 			reg_arg_name(env, argno), head_type_name);
11765 		return -EINVAL;
11766 	}
11767 
11768 	rec = reg_btf_record(reg);
11769 	head_off = reg->var_off.value;
11770 	field = btf_record_find(rec, head_off, head_field_type);
11771 	if (!field) {
11772 		verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
11773 		return -EINVAL;
11774 	}
11775 
11776 	/* All functions require bpf_list_head to be protected using a bpf_spin_lock */
11777 	if (check_reg_allocation_locked(env, reg)) {
11778 		verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
11779 			rec->spin_lock_off, head_type_name);
11780 		return -EINVAL;
11781 	}
11782 
11783 	if (*head_field) {
11784 		verifier_bug(env, "repeating %s arg", head_type_name);
11785 		return -EFAULT;
11786 	}
11787 	*head_field = field;
11788 	return 0;
11789 }
11790 
11791 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
11792 					   struct bpf_reg_state *reg, argno_t argno,
11793 					   struct bpf_kfunc_call_arg_meta *meta)
11794 {
11795 	return __process_kf_arg_ptr_to_graph_root(env, reg, argno, meta, BPF_LIST_HEAD,
11796 							  &meta->arg_list_head.field);
11797 }
11798 
11799 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
11800 					     struct bpf_reg_state *reg, argno_t argno,
11801 					     struct bpf_kfunc_call_arg_meta *meta)
11802 {
11803 	return __process_kf_arg_ptr_to_graph_root(env, reg, argno, meta, BPF_RB_ROOT,
11804 							  &meta->arg_rbtree_root.field);
11805 }
11806 
11807 static int
11808 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
11809 				   struct bpf_reg_state *reg, argno_t argno,
11810 				   struct bpf_kfunc_call_arg_meta *meta,
11811 				   enum btf_field_type head_field_type,
11812 				   enum btf_field_type node_field_type,
11813 				   struct btf_field **node_field)
11814 {
11815 	const char *node_type_name;
11816 	const struct btf_type *et, *t;
11817 	struct btf_field *field;
11818 	u32 node_off;
11819 
11820 	if (meta->btf != btf_vmlinux) {
11821 		verifier_bug(env, "unexpected btf mismatch in kfunc call");
11822 		return -EFAULT;
11823 	}
11824 
11825 	if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
11826 		return -EFAULT;
11827 
11828 	node_type_name = btf_field_type_name(node_field_type);
11829 	if (!tnum_is_const(reg->var_off)) {
11830 		verbose(env,
11831 			"%s doesn't have constant offset. %s has to be at the constant offset\n",
11832 			reg_arg_name(env, argno), node_type_name);
11833 		return -EINVAL;
11834 	}
11835 
11836 	node_off = reg->var_off.value;
11837 	field = reg_find_field_offset(reg, node_off, node_field_type);
11838 	if (!field) {
11839 		verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
11840 		return -EINVAL;
11841 	}
11842 
11843 	field = *node_field;
11844 
11845 	et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
11846 	t = btf_type_by_id(reg->btf, reg->btf_id);
11847 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
11848 				  field->graph_root.value_btf_id, true)) {
11849 		verbose(env, "operation on %s expects arg#1 %s at offset=%d "
11850 			"in struct %s, but arg is at offset=%d in struct %s\n",
11851 			btf_field_type_name(head_field_type),
11852 			btf_field_type_name(node_field_type),
11853 			field->graph_root.node_offset,
11854 			btf_name_by_offset(field->graph_root.btf, et->name_off),
11855 			node_off, btf_name_by_offset(reg->btf, t->name_off));
11856 		return -EINVAL;
11857 	}
11858 	meta->arg_btf = reg->btf;
11859 	meta->arg_btf_id = reg->btf_id;
11860 
11861 	if (node_off != field->graph_root.node_offset) {
11862 		verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
11863 			node_off, btf_field_type_name(node_field_type),
11864 			field->graph_root.node_offset,
11865 			btf_name_by_offset(field->graph_root.btf, et->name_off));
11866 		return -EINVAL;
11867 	}
11868 
11869 	return 0;
11870 }
11871 
11872 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
11873 					   struct bpf_reg_state *reg, argno_t argno,
11874 					   struct bpf_kfunc_call_arg_meta *meta)
11875 {
11876 	return __process_kf_arg_ptr_to_graph_node(env, reg, argno, meta,
11877 						  BPF_LIST_HEAD, BPF_LIST_NODE,
11878 						  &meta->arg_list_head.field);
11879 }
11880 
11881 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
11882 					     struct bpf_reg_state *reg, argno_t argno,
11883 					     struct bpf_kfunc_call_arg_meta *meta)
11884 {
11885 	return __process_kf_arg_ptr_to_graph_node(env, reg, argno, meta,
11886 						  BPF_RB_ROOT, BPF_RB_NODE,
11887 						  &meta->arg_rbtree_root.field);
11888 }
11889 
11890 /*
11891  * css_task iter allowlist is needed to avoid dead locking on css_set_lock.
11892  * LSM hooks and iters (both sleepable and non-sleepable) are safe.
11893  * Any sleepable progs are also safe since bpf_check_attach_target() enforce
11894  * them can only be attached to some specific hook points.
11895  */
11896 static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
11897 {
11898 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
11899 
11900 	switch (prog_type) {
11901 	case BPF_PROG_TYPE_LSM:
11902 		return true;
11903 	case BPF_PROG_TYPE_TRACING:
11904 		if (env->prog->expected_attach_type == BPF_TRACE_ITER)
11905 			return true;
11906 		fallthrough;
11907 	default:
11908 		return in_sleepable(env);
11909 	}
11910 }
11911 
11912 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
11913 			    int insn_idx)
11914 {
11915 	const char *func_name = meta->func_name, *ref_tname;
11916 	struct bpf_func_state *caller = cur_func(env);
11917 	struct bpf_reg_state *regs = cur_regs(env);
11918 	const struct btf *btf = meta->btf;
11919 	const struct btf_param *args;
11920 	struct btf_record *rec;
11921 	u32 i, nargs;
11922 	int ret;
11923 
11924 	args = (const struct btf_param *)(meta->func_proto + 1);
11925 	nargs = btf_type_vlen(meta->func_proto);
11926 	if (nargs > MAX_BPF_FUNC_ARGS) {
11927 		verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
11928 			MAX_BPF_FUNC_ARGS);
11929 		return -EINVAL;
11930 	}
11931 	if (nargs > MAX_BPF_FUNC_REG_ARGS && !bpf_jit_supports_stack_args()) {
11932 		verbose(env, "JIT does not support kfunc %s() with %d args\n",
11933 			func_name, nargs);
11934 		return -ENOTSUPP;
11935 	}
11936 
11937 	ret = check_outgoing_stack_args(env, caller, nargs);
11938 	if (ret)
11939 		return ret;
11940 
11941 	/* Check that BTF function arguments match actual types that the
11942 	 * verifier sees.
11943 	 */
11944 	for (i = 0; i < nargs; i++) {
11945 		struct bpf_reg_state *reg = get_func_arg_reg(caller, regs, i);
11946 		const struct btf_type *t, *ref_t, *resolve_ret;
11947 		enum bpf_arg_type arg_type = ARG_DONTCARE;
11948 		argno_t argno = argno_from_arg(i + 1);
11949 		int regno = reg_from_argno(argno);
11950 		u32 ref_id, type_size;
11951 		bool is_ret_buf_sz = false;
11952 		int kf_arg_type;
11953 
11954 		if (is_kfunc_arg_prog_aux(btf, &args[i])) {
11955 			/* Reject repeated use bpf_prog_aux */
11956 			if (meta->arg_prog) {
11957 				verifier_bug(env, "Only 1 prog->aux argument supported per-kfunc");
11958 				return -EFAULT;
11959 			}
11960 			if (regno < 0) {
11961 				verbose(env, "%s prog->aux cannot be a stack argument\n",
11962 					reg_arg_name(env, argno));
11963 				return -EINVAL;
11964 			}
11965 			meta->arg_prog = true;
11966 			cur_aux(env)->arg_prog = regno;
11967 			continue;
11968 		}
11969 
11970 		if (is_kfunc_arg_ignore(btf, &args[i]) || is_kfunc_arg_implicit(meta, i))
11971 			continue;
11972 
11973 		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
11974 
11975 		if (btf_type_is_scalar(t)) {
11976 			if (reg->type != SCALAR_VALUE) {
11977 				verbose(env, "%s is not a scalar\n", reg_arg_name(env, argno));
11978 				return -EINVAL;
11979 			}
11980 
11981 			if (is_kfunc_arg_constant(meta->btf, &args[i])) {
11982 				if (meta->arg_constant.found) {
11983 					verifier_bug(env, "only one constant argument permitted");
11984 					return -EFAULT;
11985 				}
11986 				if (!tnum_is_const(reg->var_off)) {
11987 					verbose(env, "%s must be a known constant\n",
11988 						reg_arg_name(env, argno));
11989 					return -EINVAL;
11990 				}
11991 				if (regno >= 0)
11992 					ret = mark_chain_precision(env, regno);
11993 				else
11994 					ret = mark_stack_arg_precision(env, i);
11995 				if (ret < 0)
11996 					return ret;
11997 				meta->arg_constant.found = true;
11998 				meta->arg_constant.value = reg->var_off.value;
11999 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
12000 				meta->r0_rdonly = true;
12001 				is_ret_buf_sz = true;
12002 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
12003 				is_ret_buf_sz = true;
12004 			}
12005 
12006 			if (is_ret_buf_sz) {
12007 				if (meta->r0_size) {
12008 					verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
12009 					return -EINVAL;
12010 				}
12011 
12012 				if (!tnum_is_const(reg->var_off)) {
12013 					verbose(env, "%s is not a const\n",
12014 						reg_arg_name(env, argno));
12015 					return -EINVAL;
12016 				}
12017 
12018 				meta->r0_size = reg->var_off.value;
12019 				if (regno >= 0)
12020 					ret = mark_chain_precision(env, regno);
12021 				else
12022 					ret = mark_stack_arg_precision(env, i);
12023 				if (ret)
12024 					return ret;
12025 			}
12026 			continue;
12027 		}
12028 
12029 		if (!btf_type_is_ptr(t)) {
12030 			verbose(env, "Unrecognized %s type %s\n",
12031 				reg_arg_name(env, argno), btf_type_str(t));
12032 			return -EINVAL;
12033 		}
12034 
12035 		if ((bpf_register_is_null(reg) || type_may_be_null(reg->type)) &&
12036 		    !is_kfunc_arg_nullable(meta->btf, &args[i])) {
12037 			verbose(env, "Possibly NULL pointer passed to trusted %s\n",
12038 				reg_arg_name(env, argno));
12039 			return -EACCES;
12040 		}
12041 
12042 		if (regno == meta->release_regno && !is_kfunc_arg_dynptr(meta->btf, &args[i]) &&
12043 		    !reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) {
12044 			verbose(env, "release kfunc %s expects referenced PTR_TO_BTF_ID passed to %s\n",
12045 				func_name, reg_arg_name(env, argno));
12046 			return -EINVAL;
12047 		}
12048 
12049 		if (reg_is_referenced(env, reg))
12050 			update_ref_obj(&meta->ref_obj, reg);
12051 
12052 		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
12053 		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12054 
12055 		kf_arg_type = get_kfunc_ptr_arg_type(env, caller, regs, meta, t, ref_t, ref_tname,
12056 						     args, i, nargs, argno, reg);
12057 		if (kf_arg_type < 0)
12058 			return kf_arg_type;
12059 
12060 		switch (kf_arg_type) {
12061 		case KF_ARG_PTR_TO_NULL:
12062 			continue;
12063 		case KF_ARG_PTR_TO_MAP:
12064 			if (!reg->map_ptr) {
12065 				verbose(env, "pointer in %s isn't map pointer\n",
12066 					reg_arg_name(env, argno));
12067 				return -EINVAL;
12068 			}
12069 			if (meta->map.ptr && (reg->map_ptr->record->wq_off >= 0 ||
12070 					      reg->map_ptr->record->task_work_off >= 0)) {
12071 				/* Use map_uid (which is unique id of inner map) to reject:
12072 				 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
12073 				 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
12074 				 * if (inner_map1 && inner_map2) {
12075 				 *     wq = bpf_map_lookup_elem(inner_map1);
12076 				 *     if (wq)
12077 				 *         // mismatch would have been allowed
12078 				 *         bpf_wq_init(wq, inner_map2);
12079 				 * }
12080 				 *
12081 				 * Comparing map_ptr is enough to distinguish normal and outer maps.
12082 				 */
12083 				if (meta->map.ptr != reg->map_ptr ||
12084 				    meta->map.uid != reg->map_uid) {
12085 					if (reg->map_ptr->record->task_work_off >= 0) {
12086 						verbose(env,
12087 							"bpf_task_work pointer in R2 map_uid=%d doesn't match map pointer in R3 map_uid=%d\n",
12088 							meta->map.uid, reg->map_uid);
12089 						return -EINVAL;
12090 					}
12091 					verbose(env,
12092 						"workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
12093 						meta->map.uid, reg->map_uid);
12094 					return -EINVAL;
12095 				}
12096 			}
12097 			meta->map.ptr = reg->map_ptr;
12098 			meta->map.uid = reg->map_uid;
12099 			fallthrough;
12100 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
12101 		case KF_ARG_PTR_TO_BTF_ID:
12102 			if (!is_trusted_reg(env, reg)) {
12103 				if (!is_kfunc_rcu(meta)) {
12104 					verbose(env, "%s must be referenced or trusted\n",
12105 						reg_arg_name(env, argno));
12106 					return -EINVAL;
12107 				}
12108 				if (!is_rcu_reg(reg)) {
12109 					verbose(env, "%s must be a rcu pointer\n",
12110 						reg_arg_name(env, argno));
12111 					return -EINVAL;
12112 				}
12113 			}
12114 			fallthrough;
12115 		case KF_ARG_PTR_TO_ITER:
12116 		case KF_ARG_PTR_TO_LIST_HEAD:
12117 		case KF_ARG_PTR_TO_LIST_NODE:
12118 		case KF_ARG_PTR_TO_RB_ROOT:
12119 		case KF_ARG_PTR_TO_RB_NODE:
12120 		case KF_ARG_PTR_TO_MEM:
12121 		case KF_ARG_PTR_TO_MEM_SIZE:
12122 		case KF_ARG_PTR_TO_CALLBACK:
12123 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12124 		case KF_ARG_PTR_TO_CONST_STR:
12125 		case KF_ARG_PTR_TO_WORKQUEUE:
12126 		case KF_ARG_PTR_TO_TIMER:
12127 		case KF_ARG_PTR_TO_TASK_WORK:
12128 		case KF_ARG_PTR_TO_IRQ_FLAG:
12129 		case KF_ARG_PTR_TO_RES_SPIN_LOCK:
12130 			break;
12131 		case KF_ARG_PTR_TO_DYNPTR:
12132 			arg_type = ARG_PTR_TO_DYNPTR;
12133 			break;
12134 		case KF_ARG_PTR_TO_CTX:
12135 			arg_type = ARG_PTR_TO_CTX;
12136 			break;
12137 		default:
12138 			verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type);
12139 			return -EFAULT;
12140 		}
12141 
12142 		if (regno == meta->release_regno)
12143 			arg_type |= OBJ_RELEASE;
12144 		ret = check_func_arg_reg_off(env, reg, argno, arg_type);
12145 		if (ret < 0)
12146 			return ret;
12147 
12148 		switch (kf_arg_type) {
12149 		case KF_ARG_PTR_TO_CTX:
12150 			if (reg->type != PTR_TO_CTX) {
12151 				verbose(env, "%s expected pointer to ctx, but got %s\n",
12152 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
12153 				return -EINVAL;
12154 			}
12155 
12156 			if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12157 				ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
12158 				if (ret < 0)
12159 					return -EINVAL;
12160 				meta->ret_btf_id  = ret;
12161 			}
12162 			break;
12163 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
12164 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
12165 				if (!is_bpf_obj_drop_kfunc(meta->func_id)) {
12166 					verbose(env, "%s expected for bpf_obj_drop()\n",
12167 						reg_arg_name(env, argno));
12168 					return -EINVAL;
12169 				}
12170 			} else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
12171 				if (!is_bpf_percpu_obj_drop_kfunc(meta->func_id)) {
12172 					verbose(env, "%s expected for bpf_percpu_obj_drop()\n",
12173 						reg_arg_name(env, argno));
12174 					return -EINVAL;
12175 				}
12176 			} else {
12177 				verbose(env, "%s expected pointer to allocated object\n",
12178 					reg_arg_name(env, argno));
12179 				return -EINVAL;
12180 			}
12181 			if (!reg_is_referenced(env, reg)) {
12182 				verbose(env, "allocated object must be referenced\n");
12183 				return -EINVAL;
12184 			}
12185 			if (meta->btf == btf_vmlinux) {
12186 				meta->arg_btf = reg->btf;
12187 				meta->arg_btf_id = reg->btf_id;
12188 			}
12189 			break;
12190 		case KF_ARG_PTR_TO_DYNPTR:
12191 		{
12192 			enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
12193 
12194 			if (is_kfunc_arg_uninit(btf, &args[i]))
12195 				dynptr_arg_type |= MEM_UNINIT;
12196 
12197 			if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
12198 				dynptr_arg_type |= DYNPTR_TYPE_SKB;
12199 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
12200 				dynptr_arg_type |= DYNPTR_TYPE_XDP;
12201 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) {
12202 				dynptr_arg_type |= DYNPTR_TYPE_SKB_META;
12203 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
12204 				dynptr_arg_type |= DYNPTR_TYPE_FILE;
12205 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) {
12206 				dynptr_arg_type |= DYNPTR_TYPE_FILE | OBJ_RELEASE;
12207 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
12208 				   (dynptr_arg_type & MEM_UNINIT)) {
12209 				enum bpf_dynptr_type parent_type = meta->dynptr.type;
12210 
12211 				if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
12212 					verifier_bug(env, "no dynptr type for parent of clone");
12213 					return -EFAULT;
12214 				}
12215 
12216 				dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
12217 			}
12218 
12219 			ret = process_dynptr_func(env, reg, argno, insn_idx, dynptr_arg_type,
12220 						  &meta->ref_obj, &meta->dynptr);
12221 			if (ret < 0)
12222 				return ret;
12223 			break;
12224 		}
12225 		case KF_ARG_PTR_TO_ITER:
12226 			if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) {
12227 				if (!check_css_task_iter_allowlist(env)) {
12228 					verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n");
12229 					return -EINVAL;
12230 				}
12231 			}
12232 			ret = process_iter_arg(env, reg, argno, insn_idx, meta);
12233 			if (ret < 0)
12234 				return ret;
12235 			break;
12236 		case KF_ARG_PTR_TO_LIST_HEAD:
12237 			if (reg->type != PTR_TO_MAP_VALUE &&
12238 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12239 				verbose(env, "%s expected pointer to map value or allocated object\n",
12240 					reg_arg_name(env, argno));
12241 				return -EINVAL;
12242 			}
12243 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) &&
12244 			    !reg_is_referenced(env, reg)) {
12245 				verbose(env, "allocated object must be referenced\n");
12246 				return -EINVAL;
12247 			}
12248 			ret = process_kf_arg_ptr_to_list_head(env, reg, argno, meta);
12249 			if (ret < 0)
12250 				return ret;
12251 			break;
12252 		case KF_ARG_PTR_TO_RB_ROOT:
12253 			if (reg->type != PTR_TO_MAP_VALUE &&
12254 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12255 				verbose(env, "%s expected pointer to map value or allocated object\n",
12256 					reg_arg_name(env, argno));
12257 				return -EINVAL;
12258 			}
12259 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) &&
12260 			    !reg_is_referenced(env, reg)) {
12261 				verbose(env, "allocated object must be referenced\n");
12262 				return -EINVAL;
12263 			}
12264 			ret = process_kf_arg_ptr_to_rbtree_root(env, reg, argno, meta);
12265 			if (ret < 0)
12266 				return ret;
12267 			break;
12268 		case KF_ARG_PTR_TO_LIST_NODE:
12269 			if (is_kfunc_arg_nonown_allowed(btf, &args[i]) &&
12270 			    type_is_non_owning_ref(reg->type) && !reg_is_referenced(env, reg)) {
12271 				/* Allow bpf_list_front/back return value for
12272 				 * __nonown_allowed list-node arguments.
12273 				 */
12274 				goto check_ok;
12275 			}
12276 			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12277 				verbose(env, "%s expected pointer to allocated object\n",
12278 					reg_arg_name(env, argno));
12279 				return -EINVAL;
12280 			}
12281 			if (!reg_is_referenced(env, reg)) {
12282 				verbose(env, "allocated object must be referenced\n");
12283 				return -EINVAL;
12284 			}
12285 check_ok:
12286 			ret = process_kf_arg_ptr_to_list_node(env, reg, argno, meta);
12287 			if (ret < 0)
12288 				return ret;
12289 			break;
12290 		case KF_ARG_PTR_TO_RB_NODE:
12291 			if (is_bpf_rbtree_add_kfunc(meta->func_id)) {
12292 				if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12293 					verbose(env, "%s expected pointer to allocated object\n",
12294 						reg_arg_name(env, argno));
12295 					return -EINVAL;
12296 				}
12297 				if (!reg_is_referenced(env, reg)) {
12298 					verbose(env, "allocated object must be referenced\n");
12299 					return -EINVAL;
12300 				}
12301 			} else {
12302 				if (!type_is_non_owning_ref(reg->type) &&
12303 				    !reg_is_referenced(env, reg)) {
12304 					verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name);
12305 					return -EINVAL;
12306 				}
12307 				if (in_rbtree_lock_required_cb(env)) {
12308 					verbose(env, "%s not allowed in rbtree cb\n", func_name);
12309 					return -EINVAL;
12310 				}
12311 			}
12312 
12313 			ret = process_kf_arg_ptr_to_rbtree_node(env, reg, argno, meta);
12314 			if (ret < 0)
12315 				return ret;
12316 			break;
12317 		case KF_ARG_PTR_TO_MAP:
12318 			/* If argument has '__map' suffix expect 'struct bpf_map *' */
12319 			ref_id = *reg2btf_ids[CONST_PTR_TO_MAP];
12320 			ref_t = btf_type_by_id(btf_vmlinux, ref_id);
12321 			ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12322 			fallthrough;
12323 		case KF_ARG_PTR_TO_BTF_ID:
12324 			/* Only base_type is checked, further checks are done here */
12325 			if ((base_type(reg->type) != PTR_TO_BTF_ID ||
12326 			     (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
12327 			    !reg2btf_ids[base_type(reg->type)]) {
12328 				verbose(env, "%s is %s ", reg_arg_name(env, argno),
12329 					reg_type_str(env, reg->type));
12330 				verbose(env, "expected %s or socket\n",
12331 					reg_type_str(env, base_type(reg->type) |
12332 							  (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
12333 				return -EINVAL;
12334 			}
12335 			ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i, argno);
12336 			if (ret < 0)
12337 				return ret;
12338 			break;
12339 		case KF_ARG_PTR_TO_MEM:
12340 			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
12341 			if (IS_ERR(resolve_ret)) {
12342 				verbose(env, "%s reference type('%s %s') size cannot be determined: %ld\n",
12343 					reg_arg_name(env, argno), btf_type_str(ref_t),
12344 					ref_tname, PTR_ERR(resolve_ret));
12345 				return -EINVAL;
12346 			}
12347 			ret = check_mem_reg(env, reg, argno, type_size);
12348 			if (ret < 0)
12349 				return ret;
12350 			break;
12351 		case KF_ARG_PTR_TO_MEM_SIZE:
12352 		{
12353 			struct bpf_reg_state *buff_reg = reg;
12354 			const struct btf_param *buff_arg = &args[i];
12355 			struct bpf_reg_state *size_reg = get_func_arg_reg(caller, regs, i + 1);
12356 			const struct btf_param *size_arg = &args[i + 1];
12357 			argno_t next_argno = argno_from_arg(i + 2);
12358 
12359 			if (!bpf_register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) {
12360 				ret = check_kfunc_mem_size_reg(env, buff_reg, size_reg,
12361 							       argno, next_argno);
12362 				if (ret < 0) {
12363 					verbose(env, "%s and ", reg_arg_name(env, argno));
12364 					verbose(env, "%s memory, len pair leads to invalid memory access\n",
12365 						reg_arg_name(env, next_argno));
12366 					return ret;
12367 				}
12368 			}
12369 
12370 			if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
12371 				if (meta->arg_constant.found) {
12372 					verifier_bug(env, "only one constant argument permitted");
12373 					return -EFAULT;
12374 				}
12375 				if (!tnum_is_const(size_reg->var_off)) {
12376 					verbose(env, "%s must be a known constant\n",
12377 						reg_arg_name(env, next_argno));
12378 					return -EINVAL;
12379 				}
12380 				meta->arg_constant.found = true;
12381 				meta->arg_constant.value = size_reg->var_off.value;
12382 			}
12383 
12384 			/* Skip next '__sz' or '__szk' argument */
12385 			i++;
12386 			break;
12387 		}
12388 		case KF_ARG_PTR_TO_CALLBACK:
12389 			if (reg->type != PTR_TO_FUNC) {
12390 				verbose(env, "%s expected pointer to func\n", reg_arg_name(env, argno));
12391 				return -EINVAL;
12392 			}
12393 			meta->subprogno = reg->subprogno;
12394 			break;
12395 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12396 			if (!type_is_ptr_alloc_obj(reg->type)) {
12397 				verbose(env, "%s is neither owning or non-owning ref\n",
12398 					reg_arg_name(env, argno));
12399 				return -EINVAL;
12400 			}
12401 			if (!type_is_non_owning_ref(reg->type))
12402 				meta->arg_owning_ref = true;
12403 
12404 			rec = reg_btf_record(reg);
12405 			if (!rec) {
12406 				verifier_bug(env, "Couldn't find btf_record");
12407 				return -EFAULT;
12408 			}
12409 
12410 			if (rec->refcount_off < 0) {
12411 				verbose(env, "%s doesn't point to a type with bpf_refcount field\n",
12412 					reg_arg_name(env, argno));
12413 				return -EINVAL;
12414 			}
12415 
12416 			meta->arg_btf = reg->btf;
12417 			meta->arg_btf_id = reg->btf_id;
12418 			break;
12419 		case KF_ARG_PTR_TO_CONST_STR:
12420 			if (reg->type != PTR_TO_MAP_VALUE) {
12421 				verbose(env, "%s doesn't point to a const string\n",
12422 					reg_arg_name(env, argno));
12423 				return -EINVAL;
12424 			}
12425 			ret = check_arg_const_str(env, reg, argno);
12426 			if (ret)
12427 				return ret;
12428 			break;
12429 		case KF_ARG_PTR_TO_WORKQUEUE:
12430 			if (reg->type != PTR_TO_MAP_VALUE) {
12431 				verbose(env, "%s doesn't point to a map value\n",
12432 					reg_arg_name(env, argno));
12433 				return -EINVAL;
12434 			}
12435 			ret = check_map_field_pointer(env, reg, argno, BPF_WORKQUEUE, &meta->map);
12436 			if (ret < 0)
12437 				return ret;
12438 			break;
12439 		case KF_ARG_PTR_TO_TIMER:
12440 			if (reg->type != PTR_TO_MAP_VALUE) {
12441 				verbose(env, "%s doesn't point to a map value\n",
12442 					reg_arg_name(env, argno));
12443 				return -EINVAL;
12444 			}
12445 			ret = process_timer_kfunc(env, reg, argno, meta);
12446 			if (ret < 0)
12447 				return ret;
12448 			break;
12449 		case KF_ARG_PTR_TO_TASK_WORK:
12450 			if (reg->type != PTR_TO_MAP_VALUE) {
12451 				verbose(env, "%s doesn't point to a map value\n",
12452 					reg_arg_name(env, argno));
12453 				return -EINVAL;
12454 			}
12455 			ret = check_map_field_pointer(env, reg, argno, BPF_TASK_WORK, &meta->map);
12456 			if (ret < 0)
12457 				return ret;
12458 			break;
12459 		case KF_ARG_PTR_TO_IRQ_FLAG:
12460 			if (reg->type != PTR_TO_STACK) {
12461 				verbose(env, "%s doesn't point to an irq flag on stack\n",
12462 					reg_arg_name(env, argno));
12463 				return -EINVAL;
12464 			}
12465 			ret = process_irq_flag(env, reg, argno, meta);
12466 			if (ret < 0)
12467 				return ret;
12468 			break;
12469 		case KF_ARG_PTR_TO_RES_SPIN_LOCK:
12470 		{
12471 			int flags = PROCESS_RES_LOCK;
12472 
12473 			if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12474 				verbose(env, "%s doesn't point to map value or allocated object\n",
12475 					reg_arg_name(env, argno));
12476 				return -EINVAL;
12477 			}
12478 
12479 			if (!is_bpf_res_spin_lock_kfunc(meta->func_id))
12480 				return -EFAULT;
12481 			if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
12482 			    meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
12483 				flags |= PROCESS_SPIN_LOCK;
12484 			if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
12485 			    meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
12486 				flags |= PROCESS_LOCK_IRQ;
12487 			ret = process_spin_lock(env, reg, argno, flags);
12488 			if (ret < 0)
12489 				return ret;
12490 			break;
12491 		}
12492 		}
12493 	}
12494 
12495 	return 0;
12496 }
12497 
12498 int bpf_fetch_kfunc_arg_meta(struct bpf_verifier_env *env,
12499 			     s32 func_id,
12500 			     s16 offset,
12501 			     struct bpf_kfunc_call_arg_meta *meta)
12502 {
12503 	struct bpf_kfunc_meta kfunc;
12504 	int err;
12505 
12506 	err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
12507 	if (err)
12508 		return err;
12509 
12510 	memset(meta, 0, sizeof(*meta));
12511 	meta->btf = kfunc.btf;
12512 	meta->func_id = kfunc.id;
12513 	meta->func_proto = kfunc.proto;
12514 	meta->func_name = kfunc.name;
12515 
12516 	if (!kfunc.flags || !btf_kfunc_is_allowed(kfunc.btf, kfunc.id, env->prog))
12517 		return -EACCES;
12518 
12519 	meta->kfunc_flags = *kfunc.flags;
12520 
12521 	/* Only support release referenced argument passed by register */
12522 	if (is_kfunc_release(meta))
12523 		meta->release_regno = BPF_REG_1;
12524 
12525 	return 0;
12526 }
12527 
12528 /*
12529  * Determine how many bytes a helper accesses through a stack pointer at
12530  * argument position @arg (0-based, corresponding to R1-R5).
12531  *
12532  * Returns:
12533  *   > 0   known read access size in bytes
12534  *     0   doesn't read anything directly
12535  * S64_MIN unknown
12536  *   < 0   known write access of (-return) bytes
12537  */
12538 s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
12539 				  int arg, int insn_idx)
12540 {
12541 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
12542 	const struct bpf_func_proto *fn;
12543 	enum bpf_arg_type at;
12544 	s64 size;
12545 
12546 	if (bpf_get_helper_proto(env, insn->imm, &fn) < 0)
12547 		return S64_MIN;
12548 
12549 	at = fn->arg_type[arg];
12550 
12551 	switch (base_type(at)) {
12552 	case ARG_PTR_TO_MAP_KEY:
12553 	case ARG_PTR_TO_MAP_VALUE: {
12554 		bool is_key = base_type(at) == ARG_PTR_TO_MAP_KEY;
12555 		u64 val;
12556 		int i, map_reg;
12557 
12558 		for (i = 0; i < arg; i++) {
12559 			if (base_type(fn->arg_type[i]) == ARG_CONST_MAP_PTR)
12560 				break;
12561 		}
12562 		if (i >= arg)
12563 			goto scan_all_maps;
12564 
12565 		map_reg = BPF_REG_1 + i;
12566 
12567 		if (!(aux->const_reg_map_mask & BIT(map_reg)))
12568 			goto scan_all_maps;
12569 
12570 		i = aux->const_reg_vals[map_reg];
12571 		if (i < env->used_map_cnt) {
12572 			size = is_key ? env->used_maps[i]->key_size
12573 				      : env->used_maps[i]->value_size;
12574 			goto out;
12575 		}
12576 scan_all_maps:
12577 		/*
12578 		 * Map pointer is not known at this call site (e.g. different
12579 		 * maps on merged paths).  Conservatively return the largest
12580 		 * key_size or value_size across all maps used by the program.
12581 		 */
12582 		val = 0;
12583 		for (i = 0; i < env->used_map_cnt; i++) {
12584 			struct bpf_map *map = env->used_maps[i];
12585 			u32 sz = is_key ? map->key_size : map->value_size;
12586 
12587 			if (sz > val)
12588 				val = sz;
12589 			if (map->inner_map_meta) {
12590 				sz = is_key ? map->inner_map_meta->key_size
12591 					    : map->inner_map_meta->value_size;
12592 				if (sz > val)
12593 					val = sz;
12594 			}
12595 		}
12596 		if (!val)
12597 			return S64_MIN;
12598 		size = val;
12599 		goto out;
12600 	}
12601 	case ARG_PTR_TO_MEM:
12602 		if (at & MEM_FIXED_SIZE) {
12603 			size = fn->arg_size[arg];
12604 			goto out;
12605 		}
12606 		if (arg + 1 < ARRAY_SIZE(fn->arg_type) &&
12607 		    arg_type_is_mem_size(fn->arg_type[arg + 1])) {
12608 			int size_reg = BPF_REG_1 + arg + 1;
12609 
12610 			if (aux->const_reg_mask & BIT(size_reg)) {
12611 				size = (s64)aux->const_reg_vals[size_reg];
12612 				goto out;
12613 			}
12614 			/*
12615 			 * Size arg is const on each path but differs across merged
12616 			 * paths. MAX_BPF_STACK is a safe upper bound for reads.
12617 			 */
12618 			if (at & MEM_UNINIT)
12619 				return 0;
12620 			return MAX_BPF_STACK;
12621 		}
12622 		return S64_MIN;
12623 	case ARG_PTR_TO_DYNPTR:
12624 		size = BPF_DYNPTR_SIZE;
12625 		break;
12626 	case ARG_PTR_TO_STACK:
12627 		/*
12628 		 * Only used by bpf_calls_callback() helpers. The helper itself
12629 		 * doesn't access stack. The callback subprog does and it's
12630 		 * analyzed separately.
12631 		 */
12632 		return 0;
12633 	default:
12634 		return S64_MIN;
12635 	}
12636 out:
12637 	/*
12638 	 * MEM_UNINIT args are write-only: the helper initializes the
12639 	 * buffer without reading it.
12640 	 */
12641 	if (at & MEM_UNINIT)
12642 		return -size;
12643 	return size;
12644 }
12645 
12646 /*
12647  * Determine how many bytes a kfunc accesses through a stack pointer at
12648  * argument position @arg (0-based, corresponding to R1-R5).
12649  *
12650  * Returns:
12651  *   > 0      known read access size in bytes
12652  *     0      doesn't access memory through that argument (ex: not a pointer)
12653  *   S64_MIN  unknown
12654  *   < 0      known write access of (-return) bytes
12655  */
12656 s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
12657 				 int arg, int insn_idx)
12658 {
12659 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
12660 	struct bpf_kfunc_call_arg_meta meta;
12661 	const struct btf_param *args;
12662 	const struct btf_type *t, *ref_t;
12663 	const struct btf *btf;
12664 	u32 nargs, type_size;
12665 	s64 size;
12666 
12667 	if (bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta) < 0)
12668 		return S64_MIN;
12669 
12670 	btf = meta.btf;
12671 	args = btf_params(meta.func_proto);
12672 	nargs = btf_type_vlen(meta.func_proto);
12673 	if (arg >= nargs)
12674 		return 0;
12675 
12676 	t = btf_type_skip_modifiers(btf, args[arg].type, NULL);
12677 	if (!btf_type_is_ptr(t))
12678 		return 0;
12679 
12680 	/* dynptr: fixed 16-byte on-stack representation */
12681 	if (is_kfunc_arg_dynptr(btf, &args[arg])) {
12682 		size = BPF_DYNPTR_SIZE;
12683 		goto out;
12684 	}
12685 
12686 	/* ptr + __sz/__szk pair: size is in the next register */
12687 	if (arg + 1 < nargs &&
12688 	    (btf_param_match_suffix(btf, &args[arg + 1], "__sz") ||
12689 	     btf_param_match_suffix(btf, &args[arg + 1], "__szk"))) {
12690 		int size_reg = BPF_REG_1 + arg + 1;
12691 
12692 		if (aux->const_reg_mask & BIT(size_reg)) {
12693 			size = (s64)aux->const_reg_vals[size_reg];
12694 			goto out;
12695 		}
12696 		return MAX_BPF_STACK;
12697 	}
12698 
12699 	/* fixed-size pointed-to type: resolve via BTF */
12700 	ref_t = btf_type_skip_modifiers(btf, t->type, NULL);
12701 	if (!IS_ERR(btf_resolve_size(btf, ref_t, &type_size))) {
12702 		size = type_size;
12703 		goto out;
12704 	}
12705 
12706 	return S64_MIN;
12707 out:
12708 	/* KF_ITER_NEW kfuncs initialize the iterator state at arg 0 */
12709 	if (arg == 0 && meta.kfunc_flags & KF_ITER_NEW)
12710 		return -size;
12711 	if (is_kfunc_arg_uninit(btf, &args[arg]))
12712 		return -size;
12713 	return size;
12714 }
12715 
12716 /* check special kfuncs and return:
12717  *  1  - not fall-through to 'else' branch, continue verification
12718  *  0  - fall-through to 'else' branch
12719  * < 0 - not fall-through to 'else' branch, return error
12720  */
12721 static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
12722 			       struct bpf_reg_state *regs, struct bpf_insn_aux_data *insn_aux,
12723 			       const struct btf_type *ptr_type, struct btf *desc_btf)
12724 {
12725 	const struct btf_type *ret_t;
12726 	int err = 0;
12727 
12728 	if (meta->btf != btf_vmlinux)
12729 		return 0;
12730 
12731 	if (is_bpf_obj_new_kfunc(meta->func_id) || is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12732 		struct btf_struct_meta *struct_meta;
12733 		struct btf *ret_btf;
12734 		u32 ret_btf_id;
12735 
12736 		if (is_bpf_obj_new_kfunc(meta->func_id) && !bpf_global_ma_set)
12737 			return -ENOMEM;
12738 
12739 		if (((u64)(u32)meta->arg_constant.value) != meta->arg_constant.value) {
12740 			verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
12741 			return -EINVAL;
12742 		}
12743 
12744 		ret_btf = env->prog->aux->btf;
12745 		ret_btf_id = meta->arg_constant.value;
12746 
12747 		/* This may be NULL due to user not supplying a BTF */
12748 		if (!ret_btf) {
12749 			verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
12750 			return -EINVAL;
12751 		}
12752 
12753 		ret_t = btf_type_by_id(ret_btf, ret_btf_id);
12754 		if (!ret_t || !__btf_type_is_struct(ret_t)) {
12755 			verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
12756 			return -EINVAL;
12757 		}
12758 
12759 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12760 			if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
12761 				verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
12762 					ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
12763 				return -EINVAL;
12764 			}
12765 
12766 			if (!bpf_global_percpu_ma_set) {
12767 				mutex_lock(&bpf_percpu_ma_lock);
12768 				if (!bpf_global_percpu_ma_set) {
12769 					/* Charge memory allocated with bpf_global_percpu_ma to
12770 					 * root memcg. The obj_cgroup for root memcg is NULL.
12771 					 */
12772 					err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
12773 					if (!err)
12774 						bpf_global_percpu_ma_set = true;
12775 				}
12776 				mutex_unlock(&bpf_percpu_ma_lock);
12777 				if (err)
12778 					return err;
12779 			}
12780 
12781 			mutex_lock(&bpf_percpu_ma_lock);
12782 			err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
12783 			mutex_unlock(&bpf_percpu_ma_lock);
12784 			if (err)
12785 				return err;
12786 		}
12787 
12788 		struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
12789 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12790 			if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
12791 				verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
12792 				return -EINVAL;
12793 			}
12794 
12795 			if (struct_meta) {
12796 				verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
12797 				return -EINVAL;
12798 			}
12799 		}
12800 
12801 		mark_reg_known_zero(env, regs, BPF_REG_0);
12802 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
12803 		regs[BPF_REG_0].btf = ret_btf;
12804 		regs[BPF_REG_0].btf_id = ret_btf_id;
12805 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id))
12806 			regs[BPF_REG_0].type |= MEM_PERCPU;
12807 
12808 		insn_aux->obj_new_size = ret_t->size;
12809 		insn_aux->kptr_struct_meta = struct_meta;
12810 	} else if (is_bpf_refcount_acquire_kfunc(meta->func_id)) {
12811 		mark_reg_known_zero(env, regs, BPF_REG_0);
12812 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
12813 		regs[BPF_REG_0].btf = meta->arg_btf;
12814 		regs[BPF_REG_0].btf_id = meta->arg_btf_id;
12815 
12816 		insn_aux->kptr_struct_meta =
12817 			btf_find_struct_meta(meta->arg_btf,
12818 					     meta->arg_btf_id);
12819 	} else if (is_list_node_type(ptr_type)) {
12820 		struct btf_field *field = meta->arg_list_head.field;
12821 
12822 		mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
12823 	} else if (is_rbtree_node_type(ptr_type)) {
12824 		struct btf_field *field = meta->arg_rbtree_root.field;
12825 
12826 		mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
12827 	} else if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12828 		mark_reg_known_zero(env, regs, BPF_REG_0);
12829 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
12830 		regs[BPF_REG_0].btf = desc_btf;
12831 		regs[BPF_REG_0].btf_id = meta->ret_btf_id;
12832 	} else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
12833 		ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value);
12834 		if (!ret_t) {
12835 			verbose(env, "Unknown type ID %lld passed to kfunc bpf_rdonly_cast\n",
12836 				meta->arg_constant.value);
12837 			return -EINVAL;
12838 		} else if (btf_type_is_struct(ret_t)) {
12839 			mark_reg_known_zero(env, regs, BPF_REG_0);
12840 			regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
12841 			regs[BPF_REG_0].btf = desc_btf;
12842 			regs[BPF_REG_0].btf_id = meta->arg_constant.value;
12843 		} else if (btf_type_is_void(ret_t)) {
12844 			mark_reg_known_zero(env, regs, BPF_REG_0);
12845 			regs[BPF_REG_0].type = PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED;
12846 			regs[BPF_REG_0].mem_size = 0;
12847 		} else {
12848 			verbose(env,
12849 				"kfunc bpf_rdonly_cast type ID argument must be of a struct or void\n");
12850 			return -EINVAL;
12851 		}
12852 	} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
12853 		   meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
12854 		enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->dynptr.type);
12855 
12856 		mark_reg_known_zero(env, regs, BPF_REG_0);
12857 
12858 		if (!meta->arg_constant.found) {
12859 			verifier_bug(env, "bpf_dynptr_slice(_rdwr) no constant size");
12860 			return -EFAULT;
12861 		}
12862 
12863 		regs[BPF_REG_0].mem_size = meta->arg_constant.value;
12864 
12865 		/* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
12866 		regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
12867 
12868 		if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
12869 			regs[BPF_REG_0].type |= MEM_RDONLY;
12870 		} else {
12871 			/* this will set env->seen_direct_write to true */
12872 			if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
12873 				verbose(env, "the prog does not allow writes to packet data\n");
12874 				return -EINVAL;
12875 			}
12876 		}
12877 
12878 		if (!meta->dynptr.id) {
12879 			verifier_bug(env, "no dynptr id");
12880 			return -EFAULT;
12881 		}
12882 		regs[BPF_REG_0].parent_id = meta->dynptr.id;
12883 	} else {
12884 		return 0;
12885 	}
12886 
12887 	return 1;
12888 }
12889 
12890 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
12891 
12892 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
12893 			    int *insn_idx_p)
12894 {
12895 	bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable;
12896 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
12897 	struct bpf_reg_state *regs = cur_regs(env);
12898 	const char *func_name, *ptr_type_name;
12899 	const struct btf_type *t, *ptr_type;
12900 	struct bpf_kfunc_call_arg_meta meta;
12901 	struct bpf_insn_aux_data *insn_aux;
12902 	int err, insn_idx = *insn_idx_p;
12903 	const struct btf_param *args;
12904 	u32 i, nargs, ptr_type_id;
12905 	struct btf *desc_btf;
12906 	int id;
12907 
12908 	/* skip for now, but return error when we find this in fixup_kfunc_call */
12909 	if (!insn->imm)
12910 		return 0;
12911 
12912 	err = bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
12913 	if (err == -EACCES && meta.func_name)
12914 		verbose(env, "calling kernel function %s is not allowed\n", meta.func_name);
12915 	if (err)
12916 		return err;
12917 	desc_btf = meta.btf;
12918 	func_name = meta.func_name;
12919 	insn_aux = &env->insn_aux_data[insn_idx];
12920 
12921 	insn_aux->is_iter_next = bpf_is_iter_next_kfunc(&meta);
12922 
12923 	if (!insn->off &&
12924 	    (insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] ||
12925 	     insn->imm == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])) {
12926 		struct bpf_verifier_state *branch;
12927 		struct bpf_reg_state *regs;
12928 
12929 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
12930 		if (IS_ERR(branch)) {
12931 			verbose(env, "failed to push state for failed lock acquisition\n");
12932 			return PTR_ERR(branch);
12933 		}
12934 
12935 		regs = branch->frame[branch->curframe]->regs;
12936 
12937 		/* Clear r0-r5 registers in forked state */
12938 		for (i = 0; i < CALLER_SAVED_REGS; i++)
12939 			bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
12940 
12941 		mark_reg_unknown(env, regs, BPF_REG_0);
12942 		err = __mark_reg_s32_range(env, regs, BPF_REG_0, -MAX_ERRNO, -1);
12943 		if (err) {
12944 			verbose(env, "failed to mark s32 range for retval in forked state for lock\n");
12945 			return err;
12946 		}
12947 		__mark_btf_func_reg_size(env, regs, BPF_REG_0, sizeof(u32));
12948 	} else if (!insn->off && insn->imm == special_kfunc_list[KF___bpf_trap]) {
12949 		verbose(env, "unexpected __bpf_trap() due to uninitialized variable?\n");
12950 		return -EFAULT;
12951 	}
12952 
12953 	if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
12954 		verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
12955 		return -EACCES;
12956 	}
12957 
12958 	sleepable = bpf_is_kfunc_sleepable(&meta);
12959 	if (sleepable && !in_sleepable(env)) {
12960 		verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
12961 		return -EACCES;
12962 	}
12963 
12964 	/* Track non-sleepable context for kfuncs, same as for helpers. */
12965 	if (!in_sleepable_context(env))
12966 		insn_aux->non_sleepable = true;
12967 
12968 	/* Check the arguments */
12969 	err = check_kfunc_args(env, &meta, insn_idx);
12970 	if (err < 0)
12971 		return err;
12972 
12973 	if ((is_bpf_obj_drop_kfunc(meta.func_id) ||
12974 	     is_bpf_percpu_obj_drop_kfunc(meta.func_id)) && (is_tracing_prog_type(prog_type) ||
12975 	     /* is_tracing_prog_type() for now doesn't cover non-iterator tracing progs. */
12976 	     (prog_type == BPF_PROG_TYPE_TRACING && env->prog->expected_attach_type != BPF_TRACE_ITER
12977 	      && !env->prog->sleepable))) {
12978 		struct btf_struct_meta *struct_meta;
12979 
12980 		struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
12981 		if (struct_meta && btf_record_has_nmi_unsafe_fields(struct_meta->record)) {
12982 			verbose(env, "%s cannot be used in tracing programs on types with NMI unsafe fields\n",
12983 				func_name);
12984 			return -EINVAL;
12985 		}
12986 	}
12987 
12988 	if (is_bpf_rbtree_add_kfunc(meta.func_id)) {
12989 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
12990 					 set_rbtree_add_callback_state);
12991 		if (err) {
12992 			verbose(env, "kfunc %s#%d failed callback verification\n",
12993 				func_name, meta.func_id);
12994 			return err;
12995 		}
12996 	}
12997 
12998 	if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) {
12999 		meta.r0_size = sizeof(u64);
13000 		meta.r0_rdonly = false;
13001 	}
13002 
13003 	if (is_bpf_wq_set_callback_kfunc(meta.func_id)) {
13004 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
13005 					 set_timer_callback_state);
13006 		if (err) {
13007 			verbose(env, "kfunc %s#%d failed callback verification\n",
13008 				func_name, meta.func_id);
13009 			return err;
13010 		}
13011 	}
13012 
13013 	if (is_task_work_add_kfunc(meta.func_id)) {
13014 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
13015 					 set_task_work_schedule_callback_state);
13016 		if (err) {
13017 			verbose(env, "kfunc %s#%d failed callback verification\n",
13018 				func_name, meta.func_id);
13019 			return err;
13020 		}
13021 	}
13022 
13023 	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
13024 	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
13025 
13026 	preempt_disable = is_kfunc_bpf_preempt_disable(&meta);
13027 	preempt_enable = is_kfunc_bpf_preempt_enable(&meta);
13028 
13029 	if (rcu_lock) {
13030 		env->cur_state->active_rcu_locks++;
13031 	} else if (rcu_unlock) {
13032 		if (env->cur_state->active_rcu_locks == 0) {
13033 			verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
13034 			return -EINVAL;
13035 		}
13036 		if (--env->cur_state->active_rcu_locks == 0)
13037 			invalidate_rcu_protected_refs(env);
13038 	} else if (preempt_disable) {
13039 		env->cur_state->active_preempt_locks++;
13040 	} else if (preempt_enable) {
13041 		if (env->cur_state->active_preempt_locks == 0) {
13042 			verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name);
13043 			return -EINVAL;
13044 		}
13045 		env->cur_state->active_preempt_locks--;
13046 	}
13047 
13048 	if (sleepable && !in_sleepable_context(env)) {
13049 		verbose(env, "kernel func %s is sleepable within %s\n",
13050 			func_name, non_sleepable_context_description(env));
13051 		return -EACCES;
13052 	}
13053 
13054 	if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
13055 		verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
13056 		return -EACCES;
13057 	}
13058 
13059 	if (is_kfunc_rcu_protected(&meta) && !in_rcu_cs(env)) {
13060 		verbose(env, "kernel func %s requires RCU critical section protection\n", func_name);
13061 		return -EACCES;
13062 	}
13063 
13064 	/* In case of release function, we get register number of refcounted
13065 	 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
13066 	 */
13067 	if (meta.release_regno) {
13068 		err = release_reg(env, &regs[meta.release_regno], false, !!meta.dynptr.id);
13069 		if (err)
13070 			return err;
13071 	}
13072 
13073 	if (is_bpf_list_push_kfunc(meta.func_id) || is_bpf_rbtree_add_kfunc(meta.func_id)) {
13074 		id = regs[BPF_REG_2].id;
13075 		insn_aux->insert_off = regs[BPF_REG_2].var_off.value;
13076 		insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
13077 		ref_convert_owning_non_owning(env, id);
13078 	}
13079 
13080 	if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
13081 		if (!bpf_jit_supports_exceptions()) {
13082 			verbose(env, "JIT does not support calling kfunc %s#%d\n",
13083 				func_name, meta.func_id);
13084 			return -ENOTSUPP;
13085 		}
13086 		env->seen_exception = true;
13087 
13088 		/* In the case of the default callback, the cookie value passed
13089 		 * to bpf_throw becomes the return value of the program.
13090 		 */
13091 		if (!env->exception_callback_subprog) {
13092 			err = check_return_code(env, BPF_REG_1, "R1");
13093 			if (err < 0)
13094 				return err;
13095 		}
13096 	}
13097 
13098 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
13099 		u32 regno = caller_saved[i];
13100 
13101 		bpf_mark_reg_not_init(env, &regs[regno]);
13102 		regs[regno].subreg_def = DEF_NOT_SUBREG;
13103 	}
13104 	invalidate_outgoing_stack_args(env, cur_func(env));
13105 
13106 	/* Check return type */
13107 	t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
13108 
13109 	if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
13110 		if (meta.btf != btf_vmlinux ||
13111 		    (!is_bpf_obj_new_kfunc(meta.func_id) &&
13112 		     !is_bpf_percpu_obj_new_kfunc(meta.func_id) &&
13113 		     !is_bpf_refcount_acquire_kfunc(meta.func_id))) {
13114 			verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
13115 			return -EINVAL;
13116 		}
13117 	}
13118 
13119 	if (btf_type_is_scalar(t)) {
13120 		mark_reg_unknown(env, regs, BPF_REG_0);
13121 		if (meta.btf == btf_vmlinux && (meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
13122 		    meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]))
13123 			__mark_reg_const_zero(env, &regs[BPF_REG_0]);
13124 		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
13125 	} else if (btf_type_is_ptr(t)) {
13126 		ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
13127 		err = check_special_kfunc(env, &meta, regs, insn_aux, ptr_type, desc_btf);
13128 		if (err) {
13129 			if (err < 0)
13130 				return err;
13131 		} else if (btf_type_is_void(ptr_type)) {
13132 			/* kfunc returning 'void *' is equivalent to returning scalar */
13133 			mark_reg_unknown(env, regs, BPF_REG_0);
13134 		} else if (!__btf_type_is_struct(ptr_type)) {
13135 			if (!meta.r0_size) {
13136 				__u32 sz;
13137 
13138 				if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
13139 					meta.r0_size = sz;
13140 					meta.r0_rdonly = true;
13141 				}
13142 			}
13143 			if (!meta.r0_size) {
13144 				ptr_type_name = btf_name_by_offset(desc_btf,
13145 								   ptr_type->name_off);
13146 				verbose(env,
13147 					"kernel function %s returns pointer type %s %s is not supported\n",
13148 					func_name,
13149 					btf_type_str(ptr_type),
13150 					ptr_type_name);
13151 				return -EINVAL;
13152 			}
13153 
13154 			mark_reg_known_zero(env, regs, BPF_REG_0);
13155 			regs[BPF_REG_0].type = PTR_TO_MEM;
13156 			regs[BPF_REG_0].mem_size = meta.r0_size;
13157 
13158 			if (meta.r0_rdonly)
13159 				regs[BPF_REG_0].type |= MEM_RDONLY;
13160 
13161 			/* Ensures we don't access the memory after a release_reference() */
13162 			if (meta.ref_obj.id) {
13163 				err = validate_ref_obj(env, &meta.ref_obj);
13164 				if (err)
13165 					return err;
13166 				regs[BPF_REG_0].parent_id = meta.ref_obj.id;
13167 			}
13168 
13169 			if (is_kfunc_rcu_protected(&meta))
13170 				regs[BPF_REG_0].type |= MEM_RCU;
13171 		} else {
13172 			enum bpf_reg_type type = PTR_TO_BTF_ID;
13173 
13174 			if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache])
13175 				type |= PTR_UNTRUSTED;
13176 			else if (is_kfunc_rcu_protected(&meta) ||
13177 				 (bpf_is_iter_next_kfunc(&meta) &&
13178 				  (get_iter_from_state(env->cur_state, &meta)
13179 					   ->type & MEM_RCU))) {
13180 				/*
13181 				 * If the iterator's constructor (the _new
13182 				 * function e.g., bpf_iter_task_new) has been
13183 				 * annotated with BPF kfunc flag
13184 				 * KF_RCU_PROTECTED and was called within a RCU
13185 				 * read-side critical section, also propagate
13186 				 * the MEM_RCU flag to the pointer returned from
13187 				 * the iterator's next function (e.g.,
13188 				 * bpf_iter_task_next).
13189 				 */
13190 				type |= MEM_RCU;
13191 			} else {
13192 				/*
13193 				 * Any PTR_TO_BTF_ID that is returned from a BPF
13194 				 * kfunc should by default be treated as
13195 				 * implicitly trusted.
13196 				 */
13197 				type |= PTR_TRUSTED;
13198 			}
13199 
13200 			mark_reg_known_zero(env, regs, BPF_REG_0);
13201 			regs[BPF_REG_0].btf = desc_btf;
13202 			regs[BPF_REG_0].type = type;
13203 			regs[BPF_REG_0].btf_id = ptr_type_id;
13204 		}
13205 
13206 		if (is_kfunc_ret_null(&meta)) {
13207 			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
13208 			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
13209 			regs[BPF_REG_0].id = ++env->id_gen;
13210 		}
13211 		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
13212 		if (is_kfunc_acquire(&meta)) {
13213 			id = acquire_reference(env, insn_idx, 0);
13214 			if (id < 0)
13215 				return id;
13216 			regs[BPF_REG_0].id = id;
13217 		} else if (is_rbtree_node_type(ptr_type) || is_list_node_type(ptr_type)) {
13218 			ref_set_non_owning(env, &regs[BPF_REG_0]);
13219 		}
13220 
13221 		if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
13222 			regs[BPF_REG_0].id = ++env->id_gen;
13223 	} else if (btf_type_is_void(t)) {
13224 		if (meta.btf == btf_vmlinux) {
13225 			if (is_bpf_obj_drop_kfunc(meta.func_id) ||
13226 			    is_bpf_percpu_obj_drop_kfunc(meta.func_id)) {
13227 				insn_aux->kptr_struct_meta =
13228 					btf_find_struct_meta(meta.arg_btf,
13229 							     meta.arg_btf_id);
13230 			}
13231 		}
13232 	}
13233 
13234 	if (bpf_is_kfunc_pkt_changing(&meta))
13235 		clear_all_pkt_pointers(env);
13236 
13237 	nargs = btf_type_vlen(meta.func_proto);
13238 	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
13239 		struct bpf_func_state *caller = cur_func(env);
13240 		struct bpf_subprog_info *caller_info = &env->subprog_info[caller->subprogno];
13241 		u16 out_stack_arg_cnt = nargs - MAX_BPF_FUNC_REG_ARGS;
13242 		u16 stack_arg_cnt = bpf_in_stack_arg_cnt(caller_info) + out_stack_arg_cnt;
13243 
13244 		if (stack_arg_cnt > caller_info->stack_arg_cnt)
13245 			caller_info->stack_arg_cnt = stack_arg_cnt;
13246 	}
13247 
13248 	args = (const struct btf_param *)(meta.func_proto + 1);
13249 	for (i = 0; i < min_t(int, nargs, MAX_BPF_FUNC_REG_ARGS); i++) {
13250 		u32 regno = i + 1;
13251 
13252 		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
13253 		if (btf_type_is_ptr(t))
13254 			mark_btf_func_reg_size(env, regno, sizeof(void *));
13255 		else
13256 			/* scalar. ensured by check_kfunc_args() */
13257 			mark_btf_func_reg_size(env, regno, t->size);
13258 	}
13259 
13260 	if (bpf_is_iter_next_kfunc(&meta)) {
13261 		err = process_iter_next_call(env, insn_idx, &meta);
13262 		if (err)
13263 			return err;
13264 	}
13265 
13266 	if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie])
13267 		env->prog->call_session_cookie = true;
13268 
13269 	if (bpf_is_throw_kfunc(insn))
13270 		return process_bpf_exit_full(env, NULL, true);
13271 
13272 	return 0;
13273 }
13274 
13275 static bool check_reg_sane_offset_scalar(struct bpf_verifier_env *env,
13276 					 const struct bpf_reg_state *reg,
13277 					 enum bpf_reg_type type)
13278 {
13279 	bool known = tnum_is_const(reg->var_off);
13280 	s64 val = reg->var_off.value;
13281 	s64 smin = reg_smin(reg);
13282 
13283 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
13284 		verbose(env, "math between %s pointer and %lld is not allowed\n",
13285 			reg_type_str(env, type), val);
13286 		return false;
13287 	}
13288 
13289 	if (smin == S64_MIN) {
13290 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
13291 			reg_type_str(env, type));
13292 		return false;
13293 	}
13294 
13295 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
13296 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
13297 			smin, reg_type_str(env, type));
13298 		return false;
13299 	}
13300 
13301 	return true;
13302 }
13303 
13304 static bool check_reg_sane_offset_ptr(struct bpf_verifier_env *env,
13305 				      const struct bpf_reg_state *reg,
13306 				      enum bpf_reg_type type)
13307 {
13308 	bool known = tnum_is_const(reg->var_off);
13309 	s64 val = reg->var_off.value;
13310 	s64 smin = reg_smin(reg);
13311 
13312 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
13313 		verbose(env, "%s pointer offset %lld is not allowed\n",
13314 			reg_type_str(env, type), val);
13315 		return false;
13316 	}
13317 
13318 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
13319 		verbose(env, "%s pointer offset %lld is not allowed\n",
13320 			reg_type_str(env, type), smin);
13321 		return false;
13322 	}
13323 
13324 	return true;
13325 }
13326 
13327 enum {
13328 	REASON_BOUNDS	= -1,
13329 	REASON_TYPE	= -2,
13330 	REASON_PATHS	= -3,
13331 	REASON_LIMIT	= -4,
13332 	REASON_STACK	= -5,
13333 };
13334 
13335 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
13336 			      u32 *alu_limit, bool mask_to_left)
13337 {
13338 	u32 max = 0, ptr_limit = 0;
13339 
13340 	switch (ptr_reg->type) {
13341 	case PTR_TO_STACK:
13342 		/* Offset 0 is out-of-bounds, but acceptable start for the
13343 		 * left direction, see BPF_REG_FP. Also, unknown scalar
13344 		 * offset where we would need to deal with min/max bounds is
13345 		 * currently prohibited for unprivileged.
13346 		 */
13347 		max = MAX_BPF_STACK + mask_to_left;
13348 		ptr_limit = -ptr_reg->var_off.value;
13349 		break;
13350 	case PTR_TO_MAP_VALUE:
13351 		max = ptr_reg->map_ptr->value_size;
13352 		ptr_limit = mask_to_left ? reg_smin(ptr_reg) : reg_umax(ptr_reg);
13353 		break;
13354 	default:
13355 		return REASON_TYPE;
13356 	}
13357 
13358 	if (ptr_limit >= max)
13359 		return REASON_LIMIT;
13360 	*alu_limit = ptr_limit;
13361 	return 0;
13362 }
13363 
13364 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
13365 				    const struct bpf_insn *insn)
13366 {
13367 	return env->bypass_spec_v1 ||
13368 		BPF_SRC(insn->code) == BPF_K ||
13369 		cur_aux(env)->nospec;
13370 }
13371 
13372 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
13373 				       u32 alu_state, u32 alu_limit)
13374 {
13375 	/* If we arrived here from different branches with different
13376 	 * state or limits to sanitize, then this won't work.
13377 	 */
13378 	if (aux->alu_state &&
13379 	    (aux->alu_state != alu_state ||
13380 	     aux->alu_limit != alu_limit))
13381 		return REASON_PATHS;
13382 
13383 	/* Corresponding fixup done in do_misc_fixups(). */
13384 	aux->alu_state = alu_state;
13385 	aux->alu_limit = alu_limit;
13386 	return 0;
13387 }
13388 
13389 static int sanitize_val_alu(struct bpf_verifier_env *env,
13390 			    struct bpf_insn *insn)
13391 {
13392 	struct bpf_insn_aux_data *aux = cur_aux(env);
13393 
13394 	if (can_skip_alu_sanitation(env, insn))
13395 		return 0;
13396 
13397 	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
13398 }
13399 
13400 static bool sanitize_needed(u8 opcode)
13401 {
13402 	return opcode == BPF_ADD || opcode == BPF_SUB;
13403 }
13404 
13405 struct bpf_sanitize_info {
13406 	struct bpf_insn_aux_data aux;
13407 	bool mask_to_left;
13408 };
13409 
13410 static int sanitize_speculative_path(struct bpf_verifier_env *env,
13411 				     const struct bpf_insn *insn,
13412 				     u32 next_idx, u32 curr_idx)
13413 {
13414 	struct bpf_verifier_state *branch;
13415 	struct bpf_reg_state *regs;
13416 
13417 	branch = push_stack(env, next_idx, curr_idx, true);
13418 	if (!IS_ERR(branch) && insn) {
13419 		regs = branch->frame[branch->curframe]->regs;
13420 		if (BPF_SRC(insn->code) == BPF_K) {
13421 			mark_reg_unknown(env, regs, insn->dst_reg);
13422 		} else if (BPF_SRC(insn->code) == BPF_X) {
13423 			mark_reg_unknown(env, regs, insn->dst_reg);
13424 			mark_reg_unknown(env, regs, insn->src_reg);
13425 		}
13426 	}
13427 	return PTR_ERR_OR_ZERO(branch);
13428 }
13429 
13430 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
13431 			    struct bpf_insn *insn,
13432 			    const struct bpf_reg_state *ptr_reg,
13433 			    const struct bpf_reg_state *off_reg,
13434 			    struct bpf_reg_state *dst_reg,
13435 			    struct bpf_sanitize_info *info,
13436 			    const bool commit_window)
13437 {
13438 	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
13439 	struct bpf_verifier_state *vstate = env->cur_state;
13440 	bool off_is_imm = tnum_is_const(off_reg->var_off);
13441 	bool off_is_neg = reg_smin(off_reg) < 0;
13442 	bool ptr_is_dst_reg = ptr_reg == dst_reg;
13443 	u8 opcode = BPF_OP(insn->code);
13444 	u32 alu_state, alu_limit;
13445 	struct bpf_reg_state tmp;
13446 	int err;
13447 
13448 	if (can_skip_alu_sanitation(env, insn))
13449 		return 0;
13450 
13451 	/* We already marked aux for masking from non-speculative
13452 	 * paths, thus we got here in the first place. We only care
13453 	 * to explore bad access from here.
13454 	 */
13455 	if (vstate->speculative)
13456 		goto do_sim;
13457 
13458 	if (!commit_window) {
13459 		if (!tnum_is_const(off_reg->var_off) &&
13460 		    (reg_smin(off_reg) < 0) != (reg_smax(off_reg) < 0))
13461 			return REASON_BOUNDS;
13462 
13463 		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
13464 				     (opcode == BPF_SUB && !off_is_neg);
13465 	}
13466 
13467 	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
13468 	if (err < 0)
13469 		return err;
13470 
13471 	if (commit_window) {
13472 		/* In commit phase we narrow the masking window based on
13473 		 * the observed pointer move after the simulated operation.
13474 		 */
13475 		alu_state = info->aux.alu_state;
13476 		alu_limit = abs(info->aux.alu_limit - alu_limit);
13477 	} else {
13478 		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
13479 		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
13480 		alu_state |= ptr_is_dst_reg ?
13481 			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
13482 
13483 		/* Limit pruning on unknown scalars to enable deep search for
13484 		 * potential masking differences from other program paths.
13485 		 */
13486 		if (!off_is_imm)
13487 			env->explore_alu_limits = true;
13488 	}
13489 
13490 	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
13491 	if (err < 0)
13492 		return err;
13493 do_sim:
13494 	/* If we're in commit phase, we're done here given we already
13495 	 * pushed the truncated dst_reg into the speculative verification
13496 	 * stack.
13497 	 *
13498 	 * Also, when register is a known constant, we rewrite register-based
13499 	 * operation to immediate-based, and thus do not need masking (and as
13500 	 * a consequence, do not need to simulate the zero-truncation either).
13501 	 */
13502 	if (commit_window || off_is_imm)
13503 		return 0;
13504 
13505 	/* Simulate and find potential out-of-bounds access under
13506 	 * speculative execution from truncation as a result of
13507 	 * masking when off was not within expected range. If off
13508 	 * sits in dst, then we temporarily need to move ptr there
13509 	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
13510 	 * for cases where we use K-based arithmetic in one direction
13511 	 * and truncated reg-based in the other in order to explore
13512 	 * bad access.
13513 	 */
13514 	if (!ptr_is_dst_reg) {
13515 		tmp = *dst_reg;
13516 		*dst_reg = *ptr_reg;
13517 	}
13518 	err = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx);
13519 	if (err < 0)
13520 		return REASON_STACK;
13521 	if (!ptr_is_dst_reg)
13522 		*dst_reg = tmp;
13523 	return 0;
13524 }
13525 
13526 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
13527 {
13528 	struct bpf_verifier_state *vstate = env->cur_state;
13529 
13530 	/* If we simulate paths under speculation, we don't update the
13531 	 * insn as 'seen' such that when we verify unreachable paths in
13532 	 * the non-speculative domain, sanitize_dead_code() can still
13533 	 * rewrite/sanitize them.
13534 	 */
13535 	if (!vstate->speculative)
13536 		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
13537 }
13538 
13539 static int sanitize_err(struct bpf_verifier_env *env,
13540 			const struct bpf_insn *insn, int reason,
13541 			const struct bpf_reg_state *off_reg,
13542 			const struct bpf_reg_state *dst_reg)
13543 {
13544 	static const char *err = "pointer arithmetic with it prohibited for !root";
13545 	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
13546 	u32 dst = insn->dst_reg, src = insn->src_reg;
13547 
13548 	switch (reason) {
13549 	case REASON_BOUNDS:
13550 		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
13551 			off_reg == dst_reg ? dst : src, err);
13552 		break;
13553 	case REASON_TYPE:
13554 		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
13555 			off_reg == dst_reg ? src : dst, err);
13556 		break;
13557 	case REASON_PATHS:
13558 		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
13559 			dst, op, err);
13560 		break;
13561 	case REASON_LIMIT:
13562 		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
13563 			dst, op, err);
13564 		break;
13565 	case REASON_STACK:
13566 		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
13567 			dst, err);
13568 		return -ENOMEM;
13569 	default:
13570 		verifier_bug(env, "unknown reason (%d)", reason);
13571 		break;
13572 	}
13573 
13574 	return -EACCES;
13575 }
13576 
13577 /* check that stack access falls within stack limits and that 'reg' doesn't
13578  * have a variable offset.
13579  *
13580  * Variable offset is prohibited for unprivileged mode for simplicity since it
13581  * requires corresponding support in Spectre masking for stack ALU.  See also
13582  * retrieve_ptr_limit().
13583  */
13584 static int check_stack_access_for_ptr_arithmetic(
13585 				struct bpf_verifier_env *env,
13586 				int regno,
13587 				const struct bpf_reg_state *reg,
13588 				int off)
13589 {
13590 	if (!tnum_is_const(reg->var_off)) {
13591 		char tn_buf[48];
13592 
13593 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
13594 		verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
13595 			regno, tn_buf, off);
13596 		return -EACCES;
13597 	}
13598 
13599 	if (off >= 0 || off < -MAX_BPF_STACK) {
13600 		verbose(env, "R%d stack pointer arithmetic goes out of range, "
13601 			"prohibited for !root; off=%d\n", regno, off);
13602 		return -EACCES;
13603 	}
13604 
13605 	return 0;
13606 }
13607 
13608 static int sanitize_check_bounds(struct bpf_verifier_env *env,
13609 				 const struct bpf_insn *insn,
13610 				 struct bpf_reg_state *dst_reg)
13611 {
13612 	u32 dst = insn->dst_reg;
13613 
13614 	/* For unprivileged we require that resulting offset must be in bounds
13615 	 * in order to be able to sanitize access later on.
13616 	 */
13617 	if (env->bypass_spec_v1)
13618 		return 0;
13619 
13620 	switch (dst_reg->type) {
13621 	case PTR_TO_STACK:
13622 		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
13623 							  dst_reg->var_off.value))
13624 			return -EACCES;
13625 		break;
13626 	case PTR_TO_MAP_VALUE:
13627 		if (check_map_access(env, dst_reg, argno_from_reg(dst), 0, 1, false, ACCESS_HELPER)) {
13628 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
13629 				"prohibited for !root\n", dst);
13630 			return -EACCES;
13631 		}
13632 		break;
13633 	default:
13634 		return -EOPNOTSUPP;
13635 	}
13636 
13637 	return 0;
13638 }
13639 
13640 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
13641  * Caller should also handle BPF_MOV case separately.
13642  * If we return -EACCES, caller may want to try again treating pointer as a
13643  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
13644  */
13645 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
13646 				   struct bpf_insn *insn,
13647 				   const struct bpf_reg_state *ptr_reg,
13648 				   const struct bpf_reg_state *off_reg)
13649 {
13650 	struct bpf_verifier_state *vstate = env->cur_state;
13651 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
13652 	struct bpf_reg_state *regs = state->regs, *dst_reg;
13653 	bool known = tnum_is_const(off_reg->var_off);
13654 	s64 smin_val = reg_smin(off_reg), smax_val = reg_smax(off_reg);
13655 	u64 umin_val = reg_umin(off_reg), umax_val = reg_umax(off_reg);
13656 	struct bpf_sanitize_info info = {};
13657 	u8 opcode = BPF_OP(insn->code);
13658 	u32 dst = insn->dst_reg;
13659 	int ret, bounds_ret;
13660 
13661 	dst_reg = &regs[dst];
13662 
13663 	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
13664 	    smin_val > smax_val || umin_val > umax_val) {
13665 		/* Taint dst register if offset had invalid bounds derived from
13666 		 * e.g. dead branches.
13667 		 */
13668 		__mark_reg_unknown(env, dst_reg);
13669 		return 0;
13670 	}
13671 
13672 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
13673 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
13674 		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
13675 			__mark_reg_unknown(env, dst_reg);
13676 			return 0;
13677 		}
13678 
13679 		verbose(env,
13680 			"R%d 32-bit pointer arithmetic prohibited\n",
13681 			dst);
13682 		return -EACCES;
13683 	}
13684 
13685 	if (ptr_reg->type & PTR_MAYBE_NULL) {
13686 		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
13687 			dst, reg_type_str(env, ptr_reg->type));
13688 		return -EACCES;
13689 	}
13690 
13691 	/*
13692 	 * Accesses to untrusted PTR_TO_MEM are done through probe
13693 	 * instructions, hence no need to track offsets.
13694 	 */
13695 	if (base_type(ptr_reg->type) == PTR_TO_MEM && (ptr_reg->type & PTR_UNTRUSTED))
13696 		return 0;
13697 
13698 	switch (base_type(ptr_reg->type)) {
13699 	case PTR_TO_CTX:
13700 	case PTR_TO_MAP_VALUE:
13701 	case PTR_TO_MAP_KEY:
13702 	case PTR_TO_STACK:
13703 	case PTR_TO_PACKET_META:
13704 	case PTR_TO_PACKET:
13705 	case PTR_TO_TP_BUFFER:
13706 	case PTR_TO_BTF_ID:
13707 	case PTR_TO_MEM:
13708 	case PTR_TO_BUF:
13709 	case PTR_TO_FUNC:
13710 	case CONST_PTR_TO_DYNPTR:
13711 		break;
13712 	case PTR_TO_FLOW_KEYS:
13713 		if (known)
13714 			break;
13715 		fallthrough;
13716 	case CONST_PTR_TO_MAP:
13717 		/* smin_val represents the known value */
13718 		if (known && smin_val == 0 && opcode == BPF_ADD)
13719 			break;
13720 		fallthrough;
13721 	default:
13722 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
13723 			dst, reg_type_str(env, ptr_reg->type));
13724 		return -EACCES;
13725 	}
13726 
13727 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
13728 	 * The id may be overwritten later if we create a new variable offset.
13729 	 */
13730 	dst_reg->type = ptr_reg->type;
13731 	dst_reg->id = ptr_reg->id;
13732 
13733 	if (!check_reg_sane_offset_scalar(env, off_reg, ptr_reg->type) ||
13734 	    !check_reg_sane_offset_ptr(env, ptr_reg, ptr_reg->type))
13735 		return -EINVAL;
13736 
13737 	/* pointer types do not carry 32-bit bounds at the moment. */
13738 	__mark_reg32_unbounded(dst_reg);
13739 
13740 	if (sanitize_needed(opcode)) {
13741 		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
13742 				       &info, false);
13743 		if (ret < 0)
13744 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
13745 	}
13746 
13747 	switch (opcode) {
13748 	case BPF_ADD:
13749 		/*
13750 		 * dst_reg gets the pointer type and since some positive
13751 		 * integer value was added to the pointer, give it a new 'id'
13752 		 * if it's a PTR_TO_PACKET.
13753 		 * this creates a new 'base' pointer, off_reg (variable) gets
13754 		 * added into the variable offset, and we copy the fixed offset
13755 		 * from ptr_reg.
13756 		 */
13757 		dst_reg->r64 = cnum64_add(ptr_reg->r64, off_reg->r64);
13758 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
13759 		dst_reg->raw = ptr_reg->raw;
13760 		if (reg_is_pkt_pointer(ptr_reg)) {
13761 			if (!known)
13762 				dst_reg->id = ++env->id_gen;
13763 			/*
13764 			 * Clear range for unknown addends since we can't know
13765 			 * where the pkt pointer ended up. Also clear AT_PKT_END /
13766 			 * BEYOND_PKT_END from prior comparison as any pointer
13767 			 * arithmetic invalidates them.
13768 			 */
13769 			if (!known || dst_reg->range < 0)
13770 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
13771 		}
13772 		break;
13773 	case BPF_SUB:
13774 		if (dst_reg == off_reg) {
13775 			/* scalar -= pointer.  Creates an unknown scalar */
13776 			verbose(env, "R%d tried to subtract pointer from scalar\n",
13777 				dst);
13778 			return -EACCES;
13779 		}
13780 		/* We don't allow subtraction from FP, because (according to
13781 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
13782 		 * be able to deal with it.
13783 		 */
13784 		if (ptr_reg->type == PTR_TO_STACK) {
13785 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
13786 				dst);
13787 			return -EACCES;
13788 		}
13789 		dst_reg->r64 = cnum64_add(ptr_reg->r64, cnum64_negate(off_reg->r64));
13790 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
13791 		dst_reg->raw = ptr_reg->raw;
13792 		if (reg_is_pkt_pointer(ptr_reg)) {
13793 			if (!known)
13794 				dst_reg->id = ++env->id_gen;
13795 			/*
13796 			 * Clear range if the subtrahend may be negative since
13797 			 * pkt pointer could move past its bounds. A positive
13798 			 * subtrahend moves it backwards keeping positive range
13799 			 * intact. Also clear AT_PKT_END / BEYOND_PKT_END from
13800 			 * prior comparison as arithmetic invalidates them.
13801 			 */
13802 			if ((!known && smin_val < 0) || dst_reg->range < 0)
13803 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
13804 		}
13805 		break;
13806 	case BPF_AND:
13807 	case BPF_OR:
13808 	case BPF_XOR:
13809 		/* bitwise ops on pointers are troublesome, prohibit. */
13810 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
13811 			dst, bpf_alu_string[opcode >> 4]);
13812 		return -EACCES;
13813 	default:
13814 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
13815 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
13816 			dst, bpf_alu_string[opcode >> 4]);
13817 		return -EACCES;
13818 	}
13819 
13820 	if (!check_reg_sane_offset_ptr(env, dst_reg, ptr_reg->type))
13821 		return -EINVAL;
13822 	reg_bounds_sync(dst_reg);
13823 	bounds_ret = sanitize_check_bounds(env, insn, dst_reg);
13824 	if (bounds_ret == -EACCES)
13825 		return bounds_ret;
13826 	if (sanitize_needed(opcode)) {
13827 		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
13828 				       &info, true);
13829 		if (verifier_bug_if(!can_skip_alu_sanitation(env, insn)
13830 				    && !env->cur_state->speculative
13831 				    && bounds_ret
13832 				    && !ret,
13833 				    env, "Pointer type unsupported by sanitize_check_bounds() not rejected by retrieve_ptr_limit() as required")) {
13834 			return -EFAULT;
13835 		}
13836 		if (ret < 0)
13837 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
13838 	}
13839 
13840 	return 0;
13841 }
13842 
13843 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
13844 				 struct bpf_reg_state *src_reg)
13845 {
13846 	dst_reg->r32 = cnum32_add(dst_reg->r32, src_reg->r32);
13847 }
13848 
13849 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
13850 			       struct bpf_reg_state *src_reg)
13851 {
13852 	dst_reg->r64 = cnum64_add(dst_reg->r64, src_reg->r64);
13853 }
13854 
13855 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
13856 				 struct bpf_reg_state *src_reg)
13857 {
13858 	dst_reg->r32 = cnum32_add(dst_reg->r32, cnum32_negate(src_reg->r32));
13859 }
13860 
13861 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
13862 			       struct bpf_reg_state *src_reg)
13863 {
13864 	dst_reg->r64 = cnum64_add(dst_reg->r64, cnum64_negate(src_reg->r64));
13865 }
13866 
13867 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
13868 				 struct bpf_reg_state *src_reg)
13869 {
13870 	s32 smin = reg_s32_min(dst_reg);
13871 	s32 smax = reg_s32_max(dst_reg);
13872 	u32 umin = reg_u32_min(dst_reg);
13873 	u32 umax = reg_u32_max(dst_reg);
13874 	s32 tmp_prod[4];
13875 
13876 	if (check_mul_overflow(umax, reg_u32_max(src_reg), &umax) ||
13877 	    check_mul_overflow(umin, reg_u32_min(src_reg), &umin)) {
13878 		/* Overflow possible, we know nothing */
13879 		umin = 0;
13880 		umax = U32_MAX;
13881 	}
13882 	if (check_mul_overflow(smin, reg_s32_min(src_reg), &tmp_prod[0]) ||
13883 	    check_mul_overflow(smin, reg_s32_max(src_reg), &tmp_prod[1]) ||
13884 	    check_mul_overflow(smax, reg_s32_min(src_reg), &tmp_prod[2]) ||
13885 	    check_mul_overflow(smax, reg_s32_max(src_reg), &tmp_prod[3])) {
13886 		/* Overflow possible, we know nothing */
13887 		smin = S32_MIN;
13888 		smax = S32_MAX;
13889 	} else {
13890 		smin = min_array(tmp_prod, 4);
13891 		smax = max_array(tmp_prod, 4);
13892 	}
13893 
13894 	dst_reg->r32 = cnum32_intersect(cnum32_from_urange(umin, umax),
13895 					cnum32_from_srange(smin, smax));
13896 }
13897 
13898 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
13899 			       struct bpf_reg_state *src_reg)
13900 {
13901 	s64 smin = reg_smin(dst_reg);
13902 	s64 smax = reg_smax(dst_reg);
13903 	u64 umin = reg_umin(dst_reg);
13904 	u64 umax = reg_umax(dst_reg);
13905 	s64 tmp_prod[4];
13906 
13907 	if (check_mul_overflow(umax, reg_umax(src_reg), &umax) ||
13908 	    check_mul_overflow(umin, reg_umin(src_reg), &umin)) {
13909 		/* Overflow possible, we know nothing */
13910 		umin = 0;
13911 		umax = U64_MAX;
13912 	}
13913 	if (check_mul_overflow(smin, reg_smin(src_reg), &tmp_prod[0]) ||
13914 	    check_mul_overflow(smin, reg_smax(src_reg), &tmp_prod[1]) ||
13915 	    check_mul_overflow(smax, reg_smin(src_reg), &tmp_prod[2]) ||
13916 	    check_mul_overflow(smax, reg_smax(src_reg), &tmp_prod[3])) {
13917 		/* Overflow possible, we know nothing */
13918 		smin = S64_MIN;
13919 		smax = S64_MAX;
13920 	} else {
13921 		smin = min_array(tmp_prod, 4);
13922 		smax = max_array(tmp_prod, 4);
13923 	}
13924 
13925 	dst_reg->r64 = cnum64_intersect(cnum64_from_urange(umin, umax),
13926 					cnum64_from_srange(smin, smax));
13927 }
13928 
13929 static void scalar32_min_max_udiv(struct bpf_reg_state *dst_reg,
13930 				  struct bpf_reg_state *src_reg)
13931 {
13932 	u32 src_val = reg_u32_min(src_reg); /* non-zero, const divisor */
13933 
13934 	reg_set_urange32(dst_reg, reg_u32_min(dst_reg) / src_val,
13935 			 reg_u32_max(dst_reg) / src_val);
13936 
13937 	/* Reset other ranges/tnum to unbounded/unknown. */
13938 	reset_reg64_and_tnum(dst_reg);
13939 }
13940 
13941 static void scalar_min_max_udiv(struct bpf_reg_state *dst_reg,
13942 				struct bpf_reg_state *src_reg)
13943 {
13944 	u64 src_val = reg_umin(src_reg); /* non-zero, const divisor */
13945 
13946 	reg_set_urange64(dst_reg, div64_u64(reg_umin(dst_reg), src_val),
13947 			 div64_u64(reg_umax(dst_reg), src_val));
13948 
13949 	/* Reset other ranges/tnum to unbounded/unknown. */
13950 	reset_reg32_and_tnum(dst_reg);
13951 }
13952 
13953 static void scalar32_min_max_sdiv(struct bpf_reg_state *dst_reg,
13954 				  struct bpf_reg_state *src_reg)
13955 {
13956 	s32 smin = reg_s32_min(dst_reg);
13957 	s32 smax = reg_s32_max(dst_reg);
13958 	s32 src_val = reg_s32_min(src_reg); /* non-zero, const divisor */
13959 	s32 res1, res2;
13960 
13961 	/* BPF div specification: S32_MIN / -1 = S32_MIN */
13962 	if (smin == S32_MIN && src_val == -1) {
13963 		/*
13964 		 * If the dividend range contains more than just S32_MIN,
13965 		 * we cannot precisely track the result, so it becomes unbounded.
13966 		 * e.g., [S32_MIN, S32_MIN+10]/(-1),
13967 		 *     = {S32_MIN} U [-(S32_MIN+10), -(S32_MIN+1)]
13968 		 *     = {S32_MIN} U [S32_MAX-9, S32_MAX] = [S32_MIN, S32_MAX]
13969 		 * Otherwise (if dividend is exactly S32_MIN), result remains S32_MIN.
13970 		 */
13971 		if (smax != S32_MIN) {
13972 			smin = S32_MIN;
13973 			smax = S32_MAX;
13974 		}
13975 		goto reset;
13976 	}
13977 
13978 	res1 = smin / src_val;
13979 	res2 = smax / src_val;
13980 	smin = min(res1, res2);
13981 	smax = max(res1, res2);
13982 
13983 reset:
13984 	reg_set_srange32(dst_reg, smin, smax);
13985 	/* Reset other ranges/tnum to unbounded/unknown. */
13986 	reset_reg64_and_tnum(dst_reg);
13987 }
13988 
13989 static void scalar_min_max_sdiv(struct bpf_reg_state *dst_reg,
13990 				struct bpf_reg_state *src_reg)
13991 {
13992 	s64 smin = reg_smin(dst_reg);
13993 	s64 smax = reg_smax(dst_reg);
13994 	s64 src_val = reg_smin(src_reg); /* non-zero, const divisor */
13995 	s64 res1, res2;
13996 
13997 	/* BPF div specification: S64_MIN / -1 = S64_MIN */
13998 	if (smin == S64_MIN && src_val == -1) {
13999 		/*
14000 		 * If the dividend range contains more than just S64_MIN,
14001 		 * we cannot precisely track the result, so it becomes unbounded.
14002 		 * e.g., [S64_MIN, S64_MIN+10]/(-1),
14003 		 *     = {S64_MIN} U [-(S64_MIN+10), -(S64_MIN+1)]
14004 		 *     = {S64_MIN} U [S64_MAX-9, S64_MAX] = [S64_MIN, S64_MAX]
14005 		 * Otherwise (if dividend is exactly S64_MIN), result remains S64_MIN.
14006 		 */
14007 		if (smax != S64_MIN) {
14008 			smin = S64_MIN;
14009 			smax = S64_MAX;
14010 		}
14011 		goto reset;
14012 	}
14013 
14014 	res1 = div64_s64(smin, src_val);
14015 	res2 = div64_s64(smax, src_val);
14016 	smin = min(res1, res2);
14017 	smax = max(res1, res2);
14018 
14019 reset:
14020 	reg_set_srange64(dst_reg, smin, smax);
14021 	/* Reset other ranges/tnum to unbounded/unknown. */
14022 	reset_reg32_and_tnum(dst_reg);
14023 }
14024 
14025 static void scalar32_min_max_umod(struct bpf_reg_state *dst_reg,
14026 				  struct bpf_reg_state *src_reg)
14027 {
14028 	u32 src_val = reg_u32_min(src_reg); /* non-zero, const divisor */
14029 	u32 res_max = src_val - 1;
14030 
14031 	/*
14032 	 * If dst_umax <= res_max, the result remains unchanged.
14033 	 * e.g., [2, 5] % 10 = [2, 5].
14034 	 */
14035 	if (reg_u32_max(dst_reg) <= res_max)
14036 		return;
14037 
14038 	reg_set_urange32(dst_reg, 0, min(reg_u32_max(dst_reg), res_max));
14039 
14040 	/* Reset other ranges/tnum to unbounded/unknown. */
14041 	reset_reg64_and_tnum(dst_reg);
14042 }
14043 
14044 static void scalar_min_max_umod(struct bpf_reg_state *dst_reg,
14045 				struct bpf_reg_state *src_reg)
14046 {
14047 	u64 src_val = reg_umin(src_reg); /* non-zero, const divisor */
14048 	u64 res_max = src_val - 1;
14049 
14050 	/*
14051 	 * If dst_umax <= res_max, the result remains unchanged.
14052 	 * e.g., [2, 5] % 10 = [2, 5].
14053 	 */
14054 	if (reg_umax(dst_reg) <= res_max)
14055 		return;
14056 
14057 	reg_set_urange64(dst_reg, 0, min(reg_umax(dst_reg), res_max));
14058 
14059 	/* Reset other ranges/tnum to unbounded/unknown. */
14060 	reset_reg32_and_tnum(dst_reg);
14061 }
14062 
14063 static void scalar32_min_max_smod(struct bpf_reg_state *dst_reg,
14064 				  struct bpf_reg_state *src_reg)
14065 {
14066 	s32 src_val = reg_s32_min(src_reg); /* non-zero, const divisor */
14067 
14068 	/*
14069 	 * Safe absolute value calculation:
14070 	 * If src_val == S32_MIN (-2147483648), src_abs becomes 2147483648.
14071 	 * Here use unsigned integer to avoid overflow.
14072 	 */
14073 	u32 src_abs = (src_val > 0) ? (u32)src_val : -(u32)src_val;
14074 
14075 	/*
14076 	 * Calculate the maximum possible absolute value of the result.
14077 	 * Even if src_abs is 2147483648 (S32_MIN), subtracting 1 gives
14078 	 * 2147483647 (S32_MAX), which fits perfectly in s32.
14079 	 */
14080 	s32 res_max_abs = src_abs - 1;
14081 
14082 	/*
14083 	 * If the dividend is already within the result range,
14084 	 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
14085 	 */
14086 	if (reg_s32_min(dst_reg) >= -res_max_abs && reg_s32_max(dst_reg) <= res_max_abs)
14087 		return;
14088 
14089 	/* General case: result has the same sign as the dividend. */
14090 	if (reg_s32_min(dst_reg) >= 0) {
14091 		reg_set_srange32(dst_reg, 0, min(reg_s32_max(dst_reg), res_max_abs));
14092 	} else if (reg_s32_max(dst_reg) <= 0) {
14093 		reg_set_srange32(dst_reg, max(reg_s32_min(dst_reg), -res_max_abs), 0);
14094 	} else {
14095 		reg_set_srange32(dst_reg, -res_max_abs, res_max_abs);
14096 	}
14097 
14098 	/* Reset other ranges/tnum to unbounded/unknown. */
14099 	reset_reg64_and_tnum(dst_reg);
14100 }
14101 
14102 static void scalar_min_max_smod(struct bpf_reg_state *dst_reg,
14103 				struct bpf_reg_state *src_reg)
14104 {
14105 	s64 src_val = reg_smin(src_reg); /* non-zero, const divisor */
14106 
14107 	/*
14108 	 * Safe absolute value calculation:
14109 	 * If src_val == S64_MIN (-2^63), src_abs becomes 2^63.
14110 	 * Here use unsigned integer to avoid overflow.
14111 	 */
14112 	u64 src_abs = (src_val > 0) ? (u64)src_val : -(u64)src_val;
14113 
14114 	/*
14115 	 * Calculate the maximum possible absolute value of the result.
14116 	 * Even if src_abs is 2^63 (S64_MIN), subtracting 1 gives
14117 	 * 2^63 - 1 (S64_MAX), which fits perfectly in s64.
14118 	 */
14119 	s64 res_max_abs = src_abs - 1;
14120 
14121 	/*
14122 	 * If the dividend is already within the result range,
14123 	 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
14124 	 */
14125 	if (reg_smin(dst_reg) >= -res_max_abs && reg_smax(dst_reg) <= res_max_abs)
14126 		return;
14127 
14128 	/* General case: result has the same sign as the dividend. */
14129 	if (reg_smin(dst_reg) >= 0) {
14130 		reg_set_srange64(dst_reg, 0, min(reg_smax(dst_reg), res_max_abs));
14131 	} else if (reg_smax(dst_reg) <= 0) {
14132 		reg_set_srange64(dst_reg, max(reg_smin(dst_reg), -res_max_abs), 0);
14133 	} else {
14134 		reg_set_srange64(dst_reg, -res_max_abs, res_max_abs);
14135 	}
14136 
14137 	/* Reset other ranges/tnum to unbounded/unknown. */
14138 	reset_reg32_and_tnum(dst_reg);
14139 }
14140 
14141 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
14142 				 struct bpf_reg_state *src_reg)
14143 {
14144 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14145 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14146 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14147 	u32 umax_val = reg_u32_max(src_reg);
14148 
14149 	if (src_known && dst_known) {
14150 		__mark_reg32_known(dst_reg, var32_off.value);
14151 		return;
14152 	}
14153 
14154 	/* We get our minimum from the var_off, since that's inherently
14155 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
14156 	 */
14157 	reg_set_urange32(dst_reg,
14158 			 var32_off.value,
14159 			 min(reg_u32_max(dst_reg), umax_val));
14160 }
14161 
14162 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
14163 			       struct bpf_reg_state *src_reg)
14164 {
14165 	bool src_known = tnum_is_const(src_reg->var_off);
14166 	bool dst_known = tnum_is_const(dst_reg->var_off);
14167 	u64 umax_val = reg_umax(src_reg);
14168 
14169 	if (src_known && dst_known) {
14170 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14171 		return;
14172 	}
14173 
14174 	/* We get our minimum from the var_off, since that's inherently
14175 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
14176 	 */
14177 	reg_set_urange64(dst_reg,
14178 			 dst_reg->var_off.value,
14179 			 min(reg_umax(dst_reg), umax_val));
14180 
14181 	/* We may learn something more from the var_off */
14182 	__update_reg_bounds(dst_reg);
14183 }
14184 
14185 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
14186 				struct bpf_reg_state *src_reg)
14187 {
14188 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14189 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14190 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14191 	u32 umin_val = reg_u32_min(src_reg);
14192 
14193 	if (src_known && dst_known) {
14194 		__mark_reg32_known(dst_reg, var32_off.value);
14195 		return;
14196 	}
14197 
14198 	/* We get our maximum from the var_off, and our minimum is the
14199 	 * maximum of the operands' minima
14200 	 */
14201 	reg_set_urange32(dst_reg,
14202 			 max(reg_u32_min(dst_reg), umin_val),
14203 			 var32_off.value | var32_off.mask);
14204 }
14205 
14206 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
14207 			      struct bpf_reg_state *src_reg)
14208 {
14209 	bool src_known = tnum_is_const(src_reg->var_off);
14210 	bool dst_known = tnum_is_const(dst_reg->var_off);
14211 	u64 umin_val = reg_umin(src_reg);
14212 
14213 	if (src_known && dst_known) {
14214 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14215 		return;
14216 	}
14217 
14218 	/* We get our maximum from the var_off, and our minimum is the
14219 	 * maximum of the operands' minima
14220 	 */
14221 	reg_set_urange64(dst_reg,
14222 			 max(reg_umin(dst_reg), umin_val),
14223 			 dst_reg->var_off.value | dst_reg->var_off.mask);
14224 
14225 	/* We may learn something more from the var_off */
14226 	__update_reg_bounds(dst_reg);
14227 }
14228 
14229 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
14230 				 struct bpf_reg_state *src_reg)
14231 {
14232 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14233 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14234 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14235 
14236 	if (src_known && dst_known) {
14237 		__mark_reg32_known(dst_reg, var32_off.value);
14238 		return;
14239 	}
14240 
14241 	/* We get both minimum and maximum from the var32_off. */
14242 	reg_set_urange32(dst_reg, var32_off.value, var32_off.value | var32_off.mask);
14243 }
14244 
14245 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
14246 			       struct bpf_reg_state *src_reg)
14247 {
14248 	bool src_known = tnum_is_const(src_reg->var_off);
14249 	bool dst_known = tnum_is_const(dst_reg->var_off);
14250 
14251 	if (src_known && dst_known) {
14252 		/* dst_reg->var_off.value has been updated earlier */
14253 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14254 		return;
14255 	}
14256 
14257 	/* We get both minimum and maximum from the var_off. */
14258 	reg_set_urange64(dst_reg,
14259 			 dst_reg->var_off.value,
14260 			 dst_reg->var_off.value | dst_reg->var_off.mask);
14261 }
14262 
14263 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14264 				   u64 umin_val, u64 umax_val)
14265 {
14266 	/* If we might shift our top bit out, then we know nothing */
14267 	if (umax_val > 31 || reg_u32_max(dst_reg) > 1ULL << (31 - umax_val))
14268 		reg_set_urange32(dst_reg, 0, U32_MAX);
14269 	else
14270 		/* We lose all sign bit information (except what we can pick
14271 		 * up from var_off)
14272 		 */
14273 		reg_set_urange32(dst_reg, reg_u32_min(dst_reg) << umin_val,
14274 				 reg_u32_max(dst_reg) << umax_val);
14275 }
14276 
14277 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14278 				 struct bpf_reg_state *src_reg)
14279 {
14280 	u32 umax_val = reg_u32_max(src_reg);
14281 	u32 umin_val = reg_u32_min(src_reg);
14282 	/* u32 alu operation will zext upper bits */
14283 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
14284 
14285 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14286 	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
14287 	/* Not required but being careful mark reg64 bounds as unknown so
14288 	 * that we are forced to pick them up from tnum and zext later and
14289 	 * if some path skips this step we are still safe.
14290 	 */
14291 	__mark_reg64_unbounded(dst_reg);
14292 	__update_reg32_bounds(dst_reg);
14293 }
14294 
14295 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
14296 				   u64 umin_val, u64 umax_val)
14297 {
14298 	struct cnum64 u, s;
14299 
14300 	/* Special case <<32 because it is a common compiler pattern to sign
14301 	 * extend subreg by doing <<32 s>>32. smin/smax assignments are correct
14302 	 * because s32 bounds don't flip sign when shifting to the left by
14303 	 * 32bits.
14304 	 */
14305 	if (umin_val == 32 && umax_val == 32)
14306 		s = cnum64_from_srange((s64)reg_s32_min(dst_reg) << 32,
14307 				       (s64)reg_s32_max(dst_reg) << 32);
14308 	else
14309 		s = CNUM64_UNBOUNDED;
14310 
14311 	/* If we might shift our top bit out, then we know nothing */
14312 	if (reg_umax(dst_reg) > 1ULL << (63 - umax_val))
14313 		u = CNUM64_UNBOUNDED;
14314 	else
14315 		u = cnum64_from_urange(reg_umin(dst_reg) << umin_val,
14316 				       reg_umax(dst_reg) << umax_val);
14317 
14318 	dst_reg->r64 = cnum64_intersect(u, s);
14319 }
14320 
14321 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
14322 			       struct bpf_reg_state *src_reg)
14323 {
14324 	u64 umax_val = reg_umax(src_reg);
14325 	u64 umin_val = reg_umin(src_reg);
14326 
14327 	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
14328 	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
14329 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14330 
14331 	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
14332 	/* We may learn something more from the var_off */
14333 	__update_reg_bounds(dst_reg);
14334 }
14335 
14336 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
14337 				 struct bpf_reg_state *src_reg)
14338 {
14339 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
14340 	u32 umax_val = reg_u32_max(src_reg);
14341 	u32 umin_val = reg_u32_min(src_reg);
14342 
14343 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
14344 	 * be negative, then either:
14345 	 * 1) src_reg might be zero, so the sign bit of the result is
14346 	 *    unknown, so we lose our signed bounds
14347 	 * 2) it's known negative, thus the unsigned bounds capture the
14348 	 *    signed bounds
14349 	 * 3) the signed bounds cross zero, so they tell us nothing
14350 	 *    about the result
14351 	 * If the value in dst_reg is known nonnegative, then again the
14352 	 * unsigned bounds capture the signed bounds.
14353 	 * Thus, in all cases it suffices to blow away our signed bounds
14354 	 * and rely on inferring new ones from the unsigned bounds and
14355 	 * var_off of the result.
14356 	 */
14357 
14358 	dst_reg->var_off = tnum_rshift(subreg, umin_val);
14359 	reg_set_urange32(dst_reg, reg_u32_min(dst_reg) >> umax_val,
14360 			 reg_u32_max(dst_reg) >> umin_val);
14361 
14362 	__mark_reg64_unbounded(dst_reg);
14363 	__update_reg32_bounds(dst_reg);
14364 }
14365 
14366 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
14367 			       struct bpf_reg_state *src_reg)
14368 {
14369 	u64 umax_val = reg_umax(src_reg);
14370 	u64 umin_val = reg_umin(src_reg);
14371 
14372 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
14373 	 * be negative, then either:
14374 	 * 1) src_reg might be zero, so the sign bit of the result is
14375 	 *    unknown, so we lose our signed bounds
14376 	 * 2) it's known negative, thus the unsigned bounds capture the
14377 	 *    signed bounds
14378 	 * 3) the signed bounds cross zero, so they tell us nothing
14379 	 *    about the result
14380 	 * If the value in dst_reg is known nonnegative, then again the
14381 	 * unsigned bounds capture the signed bounds.
14382 	 * Thus, in all cases it suffices to blow away our signed bounds
14383 	 * and rely on inferring new ones from the unsigned bounds and
14384 	 * var_off of the result.
14385 	 */
14386 	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
14387 	reg_set_urange64(dst_reg, reg_umin(dst_reg) >> umax_val,
14388 			 reg_umax(dst_reg) >> umin_val);
14389 
14390 	/* Its not easy to operate on alu32 bounds here because it depends
14391 	 * on bits being shifted in. Take easy way out and mark unbounded
14392 	 * so we can recalculate later from tnum.
14393 	 */
14394 	__mark_reg32_unbounded(dst_reg);
14395 	__update_reg_bounds(dst_reg);
14396 }
14397 
14398 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
14399 				  struct bpf_reg_state *src_reg)
14400 {
14401 	u64 umin_val = reg_u32_min(src_reg);
14402 
14403 	/* Upon reaching here, src_known is true and
14404 	 * umax_val is equal to umin_val.
14405 	 * Blow away the dst_reg umin_value/umax_value and rely on
14406 	 * dst_reg var_off to refine the result.
14407 	 */
14408 	reg_set_srange32(dst_reg,
14409 			 (u32)(((s32)reg_s32_min(dst_reg)) >> umin_val),
14410 			 (u32)(((s32)reg_s32_max(dst_reg)) >> umin_val));
14411 
14412 	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
14413 
14414 	__mark_reg64_unbounded(dst_reg);
14415 	__update_reg32_bounds(dst_reg);
14416 }
14417 
14418 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
14419 				struct bpf_reg_state *src_reg)
14420 {
14421 	u64 umin_val = reg_umin(src_reg);
14422 
14423 	/* Upon reaching here, src_known is true and umax_val is equal
14424 	 * to umin_val.
14425 	 */
14426 	reg_set_srange64(dst_reg, reg_smin(dst_reg) >> umin_val,
14427 			 reg_smax(dst_reg) >> umin_val);
14428 
14429 	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
14430 
14431 	/* Its not easy to operate on alu32 bounds here because it depends
14432 	 * on bits being shifted in from upper 32-bits. Take easy way out
14433 	 * and mark unbounded so we can recalculate later from tnum.
14434 	 */
14435 	__mark_reg32_unbounded(dst_reg);
14436 	__update_reg_bounds(dst_reg);
14437 }
14438 
14439 static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *insn)
14440 {
14441 	/*
14442 	 * Byte swap operation - update var_off using tnum_bswap.
14443 	 * Three cases:
14444 	 * 1. bswap(16|32|64): opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE)
14445 	 *    unconditional swap
14446 	 * 2. to_le(16|32|64): opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE)
14447 	 *    swap on big-endian, truncation or no-op on little-endian
14448 	 * 3. to_be(16|32|64): opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE)
14449 	 *    swap on little-endian, truncation or no-op on big-endian
14450 	 */
14451 
14452 	bool alu64 = BPF_CLASS(insn->code) == BPF_ALU64;
14453 	bool to_le = BPF_SRC(insn->code) == BPF_TO_LE;
14454 	bool is_big_endian;
14455 #ifdef CONFIG_CPU_BIG_ENDIAN
14456 	is_big_endian = true;
14457 #else
14458 	is_big_endian = false;
14459 #endif
14460 	/* Apply bswap if alu64 or switch between big-endian and little-endian machines */
14461 	bool need_bswap = alu64 || (to_le == is_big_endian);
14462 
14463 	/*
14464 	 * If the register is mutated, manually reset its scalar ID to break
14465 	 * any existing ties and avoid incorrect bounds propagation.
14466 	 */
14467 	if (need_bswap || insn->imm == 16 || insn->imm == 32)
14468 		clear_scalar_id(dst_reg);
14469 
14470 	if (need_bswap) {
14471 		if (insn->imm == 16)
14472 			dst_reg->var_off = tnum_bswap16(dst_reg->var_off);
14473 		else if (insn->imm == 32)
14474 			dst_reg->var_off = tnum_bswap32(dst_reg->var_off);
14475 		else if (insn->imm == 64)
14476 			dst_reg->var_off = tnum_bswap64(dst_reg->var_off);
14477 		/*
14478 		 * Byteswap scrambles the range, so we must reset bounds.
14479 		 * Bounds will be re-derived from the new tnum later.
14480 		 */
14481 		__mark_reg_unbounded(dst_reg);
14482 	}
14483 	/* For bswap16/32, truncate dst register to match the swapped size */
14484 	if (insn->imm == 16 || insn->imm == 32)
14485 		coerce_reg_to_size(dst_reg, insn->imm / 8);
14486 }
14487 
14488 static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
14489 					     const struct bpf_reg_state *src_reg)
14490 {
14491 	bool src_is_const = false;
14492 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
14493 
14494 	if (insn_bitness == 32) {
14495 		if (tnum_subreg_is_const(src_reg->var_off)
14496 		    && reg_s32_min(src_reg) == reg_s32_max(src_reg)
14497 		    && reg_u32_min(src_reg) == reg_u32_max(src_reg))
14498 			src_is_const = true;
14499 	} else {
14500 		if (tnum_is_const(src_reg->var_off)
14501 		    && reg_smin(src_reg) == reg_smax(src_reg)
14502 		    && reg_umin(src_reg) == reg_umax(src_reg))
14503 			src_is_const = true;
14504 	}
14505 
14506 	switch (BPF_OP(insn->code)) {
14507 	case BPF_ADD:
14508 	case BPF_SUB:
14509 	case BPF_NEG:
14510 	case BPF_AND:
14511 	case BPF_XOR:
14512 	case BPF_OR:
14513 	case BPF_MUL:
14514 	case BPF_END:
14515 		return true;
14516 
14517 	/*
14518 	 * Division and modulo operators range is only safe to compute when the
14519 	 * divisor is a constant.
14520 	 */
14521 	case BPF_DIV:
14522 	case BPF_MOD:
14523 		return src_is_const;
14524 
14525 	/* Shift operators range is only computable if shift dimension operand
14526 	 * is a constant. Shifts greater than 31 or 63 are undefined. This
14527 	 * includes shifts by a negative number.
14528 	 */
14529 	case BPF_LSH:
14530 	case BPF_RSH:
14531 	case BPF_ARSH:
14532 		return (src_is_const && reg_umax(src_reg) < insn_bitness);
14533 	default:
14534 		return false;
14535 	}
14536 }
14537 
14538 static int maybe_fork_scalars(struct bpf_verifier_env *env, struct bpf_insn *insn,
14539 			      struct bpf_reg_state *dst_reg)
14540 {
14541 	struct bpf_verifier_state *branch;
14542 	struct bpf_reg_state *regs;
14543 	bool alu32;
14544 
14545 	if (reg_smin(dst_reg) == -1 && reg_smax(dst_reg) == 0)
14546 		alu32 = false;
14547 	else if (reg_s32_min(dst_reg) == -1 && reg_s32_max(dst_reg) == 0)
14548 		alu32 = true;
14549 	else
14550 		return 0;
14551 
14552 	branch = push_stack(env, env->insn_idx, env->insn_idx, false);
14553 	if (IS_ERR(branch))
14554 		return PTR_ERR(branch);
14555 
14556 	regs = branch->frame[branch->curframe]->regs;
14557 	if (alu32) {
14558 		__mark_reg32_known(&regs[insn->dst_reg], 0);
14559 		__mark_reg32_known(dst_reg, -1ull);
14560 	} else {
14561 		__mark_reg_known(&regs[insn->dst_reg], 0);
14562 		__mark_reg_known(dst_reg, -1ull);
14563 	}
14564 	return 0;
14565 }
14566 
14567 /* WARNING: This function does calculations on 64-bit values, but the actual
14568  * execution may occur on 32-bit values. Therefore, things like bitshifts
14569  * need extra checks in the 32-bit case.
14570  */
14571 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
14572 				      struct bpf_insn *insn,
14573 				      struct bpf_reg_state *dst_reg,
14574 				      struct bpf_reg_state src_reg)
14575 {
14576 	u8 opcode = BPF_OP(insn->code);
14577 	s16 off = insn->off;
14578 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
14579 	int ret;
14580 
14581 	if (!is_safe_to_compute_dst_reg_range(insn, &src_reg)) {
14582 		__mark_reg_unknown(env, dst_reg);
14583 		return 0;
14584 	}
14585 
14586 	if (sanitize_needed(opcode)) {
14587 		ret = sanitize_val_alu(env, insn);
14588 		if (ret < 0)
14589 			return sanitize_err(env, insn, ret, NULL, NULL);
14590 	}
14591 
14592 	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
14593 	 * There are two classes of instructions: The first class we track both
14594 	 * alu32 and alu64 sign/unsigned bounds independently this provides the
14595 	 * greatest amount of precision when alu operations are mixed with jmp32
14596 	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
14597 	 * and BPF_OR. This is possible because these ops have fairly easy to
14598 	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
14599 	 * See alu32 verifier tests for examples. The second class of
14600 	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
14601 	 * with regards to tracking sign/unsigned bounds because the bits may
14602 	 * cross subreg boundaries in the alu64 case. When this happens we mark
14603 	 * the reg unbounded in the subreg bound space and use the resulting
14604 	 * tnum to calculate an approximation of the sign/unsigned bounds.
14605 	 */
14606 	switch (opcode) {
14607 	case BPF_ADD:
14608 		scalar32_min_max_add(dst_reg, &src_reg);
14609 		scalar_min_max_add(dst_reg, &src_reg);
14610 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
14611 		break;
14612 	case BPF_SUB:
14613 		scalar32_min_max_sub(dst_reg, &src_reg);
14614 		scalar_min_max_sub(dst_reg, &src_reg);
14615 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
14616 		break;
14617 	case BPF_NEG:
14618 		env->fake_reg[0] = *dst_reg;
14619 		__mark_reg_known(dst_reg, 0);
14620 		scalar32_min_max_sub(dst_reg, &env->fake_reg[0]);
14621 		scalar_min_max_sub(dst_reg, &env->fake_reg[0]);
14622 		dst_reg->var_off = tnum_neg(env->fake_reg[0].var_off);
14623 		break;
14624 	case BPF_MUL:
14625 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
14626 		scalar32_min_max_mul(dst_reg, &src_reg);
14627 		scalar_min_max_mul(dst_reg, &src_reg);
14628 		break;
14629 	case BPF_DIV:
14630 		/* BPF div specification: x / 0 = 0 */
14631 		if ((alu32 && reg_u32_min(&src_reg) == 0) || (!alu32 && reg_umin(&src_reg) == 0)) {
14632 			___mark_reg_known(dst_reg, 0);
14633 			break;
14634 		}
14635 		if (alu32)
14636 			if (off == 1)
14637 				scalar32_min_max_sdiv(dst_reg, &src_reg);
14638 			else
14639 				scalar32_min_max_udiv(dst_reg, &src_reg);
14640 		else
14641 			if (off == 1)
14642 				scalar_min_max_sdiv(dst_reg, &src_reg);
14643 			else
14644 				scalar_min_max_udiv(dst_reg, &src_reg);
14645 		break;
14646 	case BPF_MOD:
14647 		/* BPF mod specification: x % 0 = x */
14648 		if ((alu32 && reg_u32_min(&src_reg) == 0) || (!alu32 && reg_umin(&src_reg) == 0))
14649 			break;
14650 		if (alu32)
14651 			if (off == 1)
14652 				scalar32_min_max_smod(dst_reg, &src_reg);
14653 			else
14654 				scalar32_min_max_umod(dst_reg, &src_reg);
14655 		else
14656 			if (off == 1)
14657 				scalar_min_max_smod(dst_reg, &src_reg);
14658 			else
14659 				scalar_min_max_umod(dst_reg, &src_reg);
14660 		break;
14661 	case BPF_AND:
14662 		if (tnum_is_const(src_reg.var_off)) {
14663 			ret = maybe_fork_scalars(env, insn, dst_reg);
14664 			if (ret)
14665 				return ret;
14666 		}
14667 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
14668 		scalar32_min_max_and(dst_reg, &src_reg);
14669 		scalar_min_max_and(dst_reg, &src_reg);
14670 		break;
14671 	case BPF_OR:
14672 		if (tnum_is_const(src_reg.var_off)) {
14673 			ret = maybe_fork_scalars(env, insn, dst_reg);
14674 			if (ret)
14675 				return ret;
14676 		}
14677 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
14678 		scalar32_min_max_or(dst_reg, &src_reg);
14679 		scalar_min_max_or(dst_reg, &src_reg);
14680 		break;
14681 	case BPF_XOR:
14682 		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
14683 		scalar32_min_max_xor(dst_reg, &src_reg);
14684 		scalar_min_max_xor(dst_reg, &src_reg);
14685 		break;
14686 	case BPF_LSH:
14687 		if (alu32)
14688 			scalar32_min_max_lsh(dst_reg, &src_reg);
14689 		else
14690 			scalar_min_max_lsh(dst_reg, &src_reg);
14691 		break;
14692 	case BPF_RSH:
14693 		if (alu32)
14694 			scalar32_min_max_rsh(dst_reg, &src_reg);
14695 		else
14696 			scalar_min_max_rsh(dst_reg, &src_reg);
14697 		break;
14698 	case BPF_ARSH:
14699 		if (alu32)
14700 			scalar32_min_max_arsh(dst_reg, &src_reg);
14701 		else
14702 			scalar_min_max_arsh(dst_reg, &src_reg);
14703 		break;
14704 	case BPF_END:
14705 		scalar_byte_swap(dst_reg, insn);
14706 		break;
14707 	default:
14708 		break;
14709 	}
14710 
14711 	/*
14712 	 * ALU32 ops are zero extended into 64bit register.
14713 	 *
14714 	 * BPF_END is already handled inside the helper (truncation),
14715 	 * so skip zext here to avoid unexpected zero extension.
14716 	 * e.g., le64: opcode=(BPF_END|BPF_ALU|BPF_TO_LE), imm=0x40
14717 	 * This is a 64bit byte swap operation with alu32==true,
14718 	 * but we should not zero extend the result.
14719 	 */
14720 	if (alu32 && opcode != BPF_END)
14721 		zext_32_to_64(dst_reg);
14722 	reg_bounds_sync(dst_reg);
14723 	return 0;
14724 }
14725 
14726 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
14727  * and var_off.
14728  */
14729 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
14730 				   struct bpf_insn *insn)
14731 {
14732 	struct bpf_verifier_state *vstate = env->cur_state;
14733 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
14734 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
14735 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
14736 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
14737 	u8 opcode = BPF_OP(insn->code);
14738 	int err;
14739 
14740 	dst_reg = &regs[insn->dst_reg];
14741 	if (BPF_SRC(insn->code) == BPF_X)
14742 		src_reg = &regs[insn->src_reg];
14743 	else
14744 		src_reg = NULL;
14745 
14746 	/* Case where at least one operand is an arena. */
14747 	if (dst_reg->type == PTR_TO_ARENA || (src_reg && src_reg->type == PTR_TO_ARENA)) {
14748 		struct bpf_insn_aux_data *aux = cur_aux(env);
14749 
14750 		if (dst_reg->type != PTR_TO_ARENA)
14751 			*dst_reg = *src_reg;
14752 
14753 		dst_reg->subreg_def = env->insn_idx + 1;
14754 
14755 		if (BPF_CLASS(insn->code) == BPF_ALU64)
14756 			/*
14757 			 * 32-bit operations zero upper bits automatically.
14758 			 * 64-bit operations need to be converted to 32.
14759 			 */
14760 			aux->needs_zext = true;
14761 
14762 		/* Any arithmetic operations are allowed on arena pointers */
14763 		return 0;
14764 	}
14765 
14766 	if (dst_reg->type != SCALAR_VALUE)
14767 		ptr_reg = dst_reg;
14768 
14769 	if (BPF_SRC(insn->code) == BPF_X) {
14770 		if (src_reg->type != SCALAR_VALUE) {
14771 			if (dst_reg->type != SCALAR_VALUE) {
14772 				/* Combining two pointers by any ALU op yields
14773 				 * an arbitrary scalar. Disallow all math except
14774 				 * pointer subtraction
14775 				 */
14776 				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
14777 					mark_reg_unknown(env, regs, insn->dst_reg);
14778 					return 0;
14779 				}
14780 				verbose(env, "R%d pointer %s pointer prohibited\n",
14781 					insn->dst_reg,
14782 					bpf_alu_string[opcode >> 4]);
14783 				return -EACCES;
14784 			} else {
14785 				/* scalar += pointer
14786 				 * This is legal, but we have to reverse our
14787 				 * src/dest handling in computing the range
14788 				 */
14789 				err = mark_chain_precision(env, insn->dst_reg);
14790 				if (err)
14791 					return err;
14792 				return adjust_ptr_min_max_vals(env, insn,
14793 							       src_reg, dst_reg);
14794 			}
14795 		} else if (ptr_reg) {
14796 			/* pointer += scalar */
14797 			err = mark_chain_precision(env, insn->src_reg);
14798 			if (err)
14799 				return err;
14800 			return adjust_ptr_min_max_vals(env, insn,
14801 						       dst_reg, src_reg);
14802 		} else if (dst_reg->precise) {
14803 			/* if dst_reg is precise, src_reg should be precise as well */
14804 			err = mark_chain_precision(env, insn->src_reg);
14805 			if (err)
14806 				return err;
14807 		}
14808 	} else {
14809 		/* Pretend the src is a reg with a known value, since we only
14810 		 * need to be able to read from this state.
14811 		 */
14812 		off_reg.type = SCALAR_VALUE;
14813 		__mark_reg_known(&off_reg, insn->imm);
14814 		src_reg = &off_reg;
14815 		if (ptr_reg) /* pointer += K */
14816 			return adjust_ptr_min_max_vals(env, insn,
14817 						       ptr_reg, src_reg);
14818 	}
14819 
14820 	/* Got here implies adding two SCALAR_VALUEs */
14821 	if (WARN_ON_ONCE(ptr_reg)) {
14822 		print_verifier_state(env, vstate, vstate->curframe, true);
14823 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
14824 		return -EFAULT;
14825 	}
14826 	if (WARN_ON(!src_reg)) {
14827 		print_verifier_state(env, vstate, vstate->curframe, true);
14828 		verbose(env, "verifier internal error: no src_reg\n");
14829 		return -EFAULT;
14830 	}
14831 	/*
14832 	 * For alu32 linked register tracking, we need to check dst_reg's
14833 	 * umax_value before the ALU operation. After adjust_scalar_min_max_vals(),
14834 	 * alu32 ops will have zero-extended the result, making umax_value <= U32_MAX.
14835 	 */
14836 	u64 dst_umax = reg_umax(dst_reg);
14837 
14838 	err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
14839 	if (err)
14840 		return err;
14841 	/*
14842 	 * Compilers can generate the code
14843 	 * r1 = r2
14844 	 * r1 += 0x1
14845 	 * if r2 < 1000 goto ...
14846 	 * use r1 in memory access
14847 	 * So remember constant delta between r2 and r1 and update r1 after
14848 	 * 'if' condition.
14849 	 */
14850 	if (env->bpf_capable &&
14851 	    (BPF_OP(insn->code) == BPF_ADD || BPF_OP(insn->code) == BPF_SUB) &&
14852 	    dst_reg->id && is_reg_const(src_reg, alu32) &&
14853 	    !(BPF_SRC(insn->code) == BPF_X && insn->src_reg == insn->dst_reg)) {
14854 		u64 val = reg_const_value(src_reg, alu32);
14855 		s32 off;
14856 
14857 		if (!alu32 && ((s64)val < S32_MIN || (s64)val > S32_MAX))
14858 			goto clear_id;
14859 
14860 		if (alu32 && (dst_umax > U32_MAX))
14861 			goto clear_id;
14862 
14863 		off = (s32)val;
14864 
14865 		if (BPF_OP(insn->code) == BPF_SUB) {
14866 			/* Negating S32_MIN would overflow */
14867 			if (off == S32_MIN)
14868 				goto clear_id;
14869 			off = -off;
14870 		}
14871 
14872 		if (dst_reg->id & BPF_ADD_CONST) {
14873 			/*
14874 			 * If the register already went through rX += val
14875 			 * we cannot accumulate another val into rx->off.
14876 			 */
14877 clear_id:
14878 			clear_scalar_id(dst_reg);
14879 		} else {
14880 			if (alu32)
14881 				dst_reg->id |= BPF_ADD_CONST32;
14882 			else
14883 				dst_reg->id |= BPF_ADD_CONST64;
14884 			dst_reg->delta = off;
14885 		}
14886 	} else {
14887 		/*
14888 		 * Make sure ID is cleared otherwise dst_reg min/max could be
14889 		 * incorrectly propagated into other registers by sync_linked_regs()
14890 		 */
14891 		clear_scalar_id(dst_reg);
14892 	}
14893 	return 0;
14894 }
14895 
14896 /* check validity of 32-bit and 64-bit arithmetic operations */
14897 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
14898 {
14899 	struct bpf_reg_state *regs = cur_regs(env);
14900 	u8 opcode = BPF_OP(insn->code);
14901 	int err;
14902 
14903 	if (opcode == BPF_END || opcode == BPF_NEG) {
14904 		/* check src operand */
14905 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
14906 		if (err)
14907 			return err;
14908 
14909 		if (is_pointer_value(env, insn->dst_reg)) {
14910 			verbose(env, "R%d pointer arithmetic prohibited\n",
14911 				insn->dst_reg);
14912 			return -EACCES;
14913 		}
14914 
14915 		/* check dest operand */
14916 		if (regs[insn->dst_reg].type == SCALAR_VALUE) {
14917 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
14918 			err = err ?: adjust_scalar_min_max_vals(env, insn,
14919 							 &regs[insn->dst_reg],
14920 							 regs[insn->dst_reg]);
14921 		} else {
14922 			err = check_reg_arg(env, insn->dst_reg, DST_OP);
14923 		}
14924 		if (err)
14925 			return err;
14926 
14927 	} else if (opcode == BPF_MOV) {
14928 
14929 		if (BPF_SRC(insn->code) == BPF_X) {
14930 			if (insn->off == BPF_ADDR_SPACE_CAST) {
14931 				if (!env->prog->aux->arena) {
14932 					verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
14933 					return -EINVAL;
14934 				}
14935 			}
14936 
14937 			/* check src operand */
14938 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
14939 			if (err)
14940 				return err;
14941 		}
14942 
14943 		/* check dest operand, mark as required later */
14944 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
14945 		if (err)
14946 			return err;
14947 
14948 		if (BPF_SRC(insn->code) == BPF_X) {
14949 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
14950 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
14951 
14952 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
14953 				if (insn->imm) {
14954 					/* off == BPF_ADDR_SPACE_CAST */
14955 					mark_reg_unknown(env, regs, insn->dst_reg);
14956 					if (insn->imm == 1) { /* cast from as(1) to as(0) */
14957 						dst_reg->type = PTR_TO_ARENA;
14958 						/* PTR_TO_ARENA is 32-bit */
14959 						dst_reg->subreg_def = env->insn_idx + 1;
14960 					}
14961 				} else if (insn->off == 0) {
14962 					/* case: R1 = R2
14963 					 * copy register state to dest reg
14964 					 */
14965 					assign_scalar_id_before_mov(env, src_reg);
14966 					*dst_reg = *src_reg;
14967 					dst_reg->subreg_def = DEF_NOT_SUBREG;
14968 				} else {
14969 					/* case: R1 = (s8, s16 s32)R2 */
14970 					if (is_pointer_value(env, insn->src_reg)) {
14971 						verbose(env,
14972 							"R%d sign-extension part of pointer\n",
14973 							insn->src_reg);
14974 						return -EACCES;
14975 					} else if (src_reg->type == SCALAR_VALUE) {
14976 						bool no_sext;
14977 
14978 						no_sext = reg_umax(src_reg) < (1ULL << (insn->off - 1));
14979 						if (no_sext)
14980 							assign_scalar_id_before_mov(env, src_reg);
14981 						*dst_reg = *src_reg;
14982 						if (!no_sext)
14983 							clear_scalar_id(dst_reg);
14984 						coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
14985 						dst_reg->subreg_def = DEF_NOT_SUBREG;
14986 					} else {
14987 						mark_reg_unknown(env, regs, insn->dst_reg);
14988 					}
14989 				}
14990 			} else {
14991 				/* R1 = (u32) R2 */
14992 				if (is_pointer_value(env, insn->src_reg)) {
14993 					verbose(env,
14994 						"R%d partial copy of pointer\n",
14995 						insn->src_reg);
14996 					return -EACCES;
14997 				} else if (src_reg->type == SCALAR_VALUE) {
14998 					if (insn->off == 0) {
14999 						bool is_src_reg_u32 = get_reg_width(src_reg) <= 32;
15000 
15001 						if (is_src_reg_u32)
15002 							assign_scalar_id_before_mov(env, src_reg);
15003 						*dst_reg = *src_reg;
15004 						/* Make sure ID is cleared if src_reg is not in u32
15005 						 * range otherwise dst_reg min/max could be incorrectly
15006 						 * propagated into src_reg by sync_linked_regs()
15007 						 */
15008 						if (!is_src_reg_u32)
15009 							clear_scalar_id(dst_reg);
15010 						dst_reg->subreg_def = env->insn_idx + 1;
15011 					} else {
15012 						/* case: W1 = (s8, s16)W2 */
15013 						bool no_sext = reg_umax(src_reg) < (1ULL << (insn->off - 1));
15014 
15015 						if (no_sext)
15016 							assign_scalar_id_before_mov(env, src_reg);
15017 						*dst_reg = *src_reg;
15018 						if (!no_sext)
15019 							clear_scalar_id(dst_reg);
15020 						dst_reg->subreg_def = env->insn_idx + 1;
15021 						coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
15022 					}
15023 				} else {
15024 					mark_reg_unknown(env, regs,
15025 							 insn->dst_reg);
15026 				}
15027 				zext_32_to_64(dst_reg);
15028 				reg_bounds_sync(dst_reg);
15029 			}
15030 		} else {
15031 			/* case: R = imm
15032 			 * remember the value we stored into this reg
15033 			 */
15034 			/* clear any state __mark_reg_known doesn't set */
15035 			mark_reg_unknown(env, regs, insn->dst_reg);
15036 			regs[insn->dst_reg].type = SCALAR_VALUE;
15037 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
15038 				__mark_reg_known(regs + insn->dst_reg,
15039 						 insn->imm);
15040 			} else {
15041 				__mark_reg_known(regs + insn->dst_reg,
15042 						 (u32)insn->imm);
15043 			}
15044 		}
15045 
15046 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
15047 
15048 		if (BPF_SRC(insn->code) == BPF_X) {
15049 			/* check src1 operand */
15050 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
15051 			if (err)
15052 				return err;
15053 		}
15054 
15055 		/* check src2 operand */
15056 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15057 		if (err)
15058 			return err;
15059 
15060 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
15061 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
15062 			verbose(env, "div by zero\n");
15063 			return -EINVAL;
15064 		}
15065 
15066 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
15067 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
15068 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
15069 
15070 			if (insn->imm < 0 || insn->imm >= size) {
15071 				verbose(env, "invalid shift %d\n", insn->imm);
15072 				return -EINVAL;
15073 			}
15074 		}
15075 
15076 		/* check dest operand */
15077 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15078 		err = err ?: adjust_reg_min_max_vals(env, insn);
15079 		if (err)
15080 			return err;
15081 	}
15082 
15083 	return reg_bounds_sanity_check(env, &regs[insn->dst_reg], "alu");
15084 }
15085 
15086 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
15087 				   struct bpf_reg_state *dst_reg,
15088 				   enum bpf_reg_type type,
15089 				   bool range_right_open)
15090 {
15091 	struct bpf_func_state *state;
15092 	struct bpf_reg_state *reg;
15093 	int new_range;
15094 
15095 	if (reg_umax(dst_reg) == 0 && range_right_open)
15096 		/* This doesn't give us any range */
15097 		return;
15098 
15099 	if (reg_umax(dst_reg) > MAX_PACKET_OFF)
15100 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
15101 		 * than pkt_end, but that's because it's also less than pkt.
15102 		 */
15103 		return;
15104 
15105 	new_range = reg_umax(dst_reg);
15106 	if (range_right_open)
15107 		new_range++;
15108 
15109 	/* Examples for register markings:
15110 	 *
15111 	 * pkt_data in dst register:
15112 	 *
15113 	 *   r2 = r3;
15114 	 *   r2 += 8;
15115 	 *   if (r2 > pkt_end) goto <handle exception>
15116 	 *   <access okay>
15117 	 *
15118 	 *   r2 = r3;
15119 	 *   r2 += 8;
15120 	 *   if (r2 < pkt_end) goto <access okay>
15121 	 *   <handle exception>
15122 	 *
15123 	 *   Where:
15124 	 *     r2 == dst_reg, pkt_end == src_reg
15125 	 *     r2=pkt(id=n,off=8,r=0)
15126 	 *     r3=pkt(id=n,off=0,r=0)
15127 	 *
15128 	 * pkt_data in src register:
15129 	 *
15130 	 *   r2 = r3;
15131 	 *   r2 += 8;
15132 	 *   if (pkt_end >= r2) goto <access okay>
15133 	 *   <handle exception>
15134 	 *
15135 	 *   r2 = r3;
15136 	 *   r2 += 8;
15137 	 *   if (pkt_end <= r2) goto <handle exception>
15138 	 *   <access okay>
15139 	 *
15140 	 *   Where:
15141 	 *     pkt_end == dst_reg, r2 == src_reg
15142 	 *     r2=pkt(id=n,off=8,r=0)
15143 	 *     r3=pkt(id=n,off=0,r=0)
15144 	 *
15145 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
15146 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
15147 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
15148 	 * the check.
15149 	 */
15150 
15151 	/* If our ids match, then we must have the same max_value.  And we
15152 	 * don't care about the other reg's fixed offset, since if it's too big
15153 	 * the range won't allow anything.
15154 	 * reg_umax(dst_reg) is known < MAX_PACKET_OFF, therefore it fits in a u16.
15155 	 */
15156 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15157 		if (reg->type == type && reg->id == dst_reg->id)
15158 			/* keep the maximum range already checked */
15159 			reg->range = max(reg->range, new_range);
15160 	}));
15161 }
15162 
15163 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15164 				u8 opcode, bool is_jmp32);
15165 static u8 rev_opcode(u8 opcode);
15166 
15167 /*
15168  * Learn more information about live branches by simulating refinement on both branches.
15169  * regs_refine_cond_op() is sound, so producing ill-formed register bounds for the branch means
15170  * that branch is dead.
15171  */
15172 static int simulate_both_branches_taken(struct bpf_verifier_env *env, u8 opcode, bool is_jmp32)
15173 {
15174 	/* Fallthrough (FALSE) branch */
15175 	regs_refine_cond_op(&env->false_reg1, &env->false_reg2, rev_opcode(opcode), is_jmp32);
15176 	reg_bounds_sync(&env->false_reg1);
15177 	reg_bounds_sync(&env->false_reg2);
15178 	/*
15179 	 * If there is a range bounds violation in *any* of the abstract values in either
15180 	 * reg_states in the FALSE branch (i.e. reg1, reg2), the FALSE branch must be dead. Only
15181 	 * TRUE branch will be taken.
15182 	 */
15183 	if (range_bounds_violation(&env->false_reg1) || range_bounds_violation(&env->false_reg2))
15184 		return 1;
15185 
15186 	/* Jump (TRUE) branch */
15187 	regs_refine_cond_op(&env->true_reg1, &env->true_reg2, opcode, is_jmp32);
15188 	reg_bounds_sync(&env->true_reg1);
15189 	reg_bounds_sync(&env->true_reg2);
15190 	/*
15191 	 * If there is a range bounds violation in *any* of the abstract values in either
15192 	 * reg_states in the TRUE branch (i.e. true_reg1, true_reg2), the TRUE branch must be dead.
15193 	 * Only FALSE branch will be taken.
15194 	 */
15195 	if (range_bounds_violation(&env->true_reg1) || range_bounds_violation(&env->true_reg2))
15196 		return 0;
15197 
15198 	/* Both branches are possible, we can't determine which one will be taken. */
15199 	return -1;
15200 }
15201 
15202 /*
15203  * <reg1> <op> <reg2>, currently assuming reg2 is a constant
15204  */
15205 static int is_scalar_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1,
15206 				  struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32)
15207 {
15208 	struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off;
15209 	struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off;
15210 	u64 umin1 = is_jmp32 ? (u64)reg_u32_min(reg1) : reg_umin(reg1);
15211 	u64 umax1 = is_jmp32 ? (u64)reg_u32_max(reg1) : reg_umax(reg1);
15212 	s64 smin1 = is_jmp32 ? (s64)reg_s32_min(reg1) : reg_smin(reg1);
15213 	s64 smax1 = is_jmp32 ? (s64)reg_s32_max(reg1) : reg_smax(reg1);
15214 	u64 umin2 = is_jmp32 ? (u64)reg_u32_min(reg2) : reg_umin(reg2);
15215 	u64 umax2 = is_jmp32 ? (u64)reg_u32_max(reg2) : reg_umax(reg2);
15216 	s64 smin2 = is_jmp32 ? (s64)reg_s32_min(reg2) : reg_smin(reg2);
15217 	s64 smax2 = is_jmp32 ? (s64)reg_s32_max(reg2) : reg_smax(reg2);
15218 
15219 	if (reg1 == reg2) {
15220 		switch (opcode) {
15221 		case BPF_JGE:
15222 		case BPF_JLE:
15223 		case BPF_JSGE:
15224 		case BPF_JSLE:
15225 		case BPF_JEQ:
15226 			return 1;
15227 		case BPF_JGT:
15228 		case BPF_JLT:
15229 		case BPF_JSGT:
15230 		case BPF_JSLT:
15231 		case BPF_JNE:
15232 			return 0;
15233 		case BPF_JSET:
15234 			if (tnum_is_const(t1))
15235 				return t1.value != 0;
15236 			else
15237 				return (smin1 <= 0 && smax1 >= 0) ? -1 : 1;
15238 		default:
15239 			return -1;
15240 		}
15241 	}
15242 
15243 	switch (opcode) {
15244 	case BPF_JEQ:
15245 		/* constants, umin/umax and smin/smax checks would be
15246 		 * redundant in this case because they all should match
15247 		 */
15248 		if (tnum_is_const(t1) && tnum_is_const(t2))
15249 			return t1.value == t2.value;
15250 		if (!tnum_overlap(t1, t2))
15251 			return 0;
15252 		/* non-overlapping ranges */
15253 		if (umin1 > umax2 || umax1 < umin2)
15254 			return 0;
15255 		if (smin1 > smax2 || smax1 < smin2)
15256 			return 0;
15257 		if (!is_jmp32) {
15258 			/* if 64-bit ranges are inconclusive, see if we can
15259 			 * utilize 32-bit subrange knowledge to eliminate
15260 			 * branches that can't be taken a priori
15261 			 */
15262 			if (reg_u32_min(reg1) > reg_u32_max(reg2) ||
15263 			    reg_u32_max(reg1) < reg_u32_min(reg2))
15264 				return 0;
15265 			if (reg_s32_min(reg1) > reg_s32_max(reg2) ||
15266 			    reg_s32_max(reg1) < reg_s32_min(reg2))
15267 				return 0;
15268 		}
15269 		break;
15270 	case BPF_JNE:
15271 		/* constants, umin/umax and smin/smax checks would be
15272 		 * redundant in this case because they all should match
15273 		 */
15274 		if (tnum_is_const(t1) && tnum_is_const(t2))
15275 			return t1.value != t2.value;
15276 		if (!tnum_overlap(t1, t2))
15277 			return 1;
15278 		/* non-overlapping ranges */
15279 		if (umin1 > umax2 || umax1 < umin2)
15280 			return 1;
15281 		if (smin1 > smax2 || smax1 < smin2)
15282 			return 1;
15283 		if (!is_jmp32) {
15284 			/* if 64-bit ranges are inconclusive, see if we can
15285 			 * utilize 32-bit subrange knowledge to eliminate
15286 			 * branches that can't be taken a priori
15287 			 */
15288 			if (reg_u32_min(reg1) > reg_u32_max(reg2) ||
15289 			    reg_u32_max(reg1) < reg_u32_min(reg2))
15290 				return 1;
15291 			if (reg_s32_min(reg1) > reg_s32_max(reg2) ||
15292 			    reg_s32_max(reg1) < reg_s32_min(reg2))
15293 				return 1;
15294 		}
15295 		break;
15296 	case BPF_JSET:
15297 		if (!is_reg_const(reg2, is_jmp32)) {
15298 			swap(reg1, reg2);
15299 			swap(t1, t2);
15300 		}
15301 		if (!is_reg_const(reg2, is_jmp32))
15302 			return -1;
15303 		if ((~t1.mask & t1.value) & t2.value)
15304 			return 1;
15305 		if (!((t1.mask | t1.value) & t2.value))
15306 			return 0;
15307 		break;
15308 	case BPF_JGT:
15309 		if (umin1 > umax2)
15310 			return 1;
15311 		else if (umax1 <= umin2)
15312 			return 0;
15313 		break;
15314 	case BPF_JSGT:
15315 		if (smin1 > smax2)
15316 			return 1;
15317 		else if (smax1 <= smin2)
15318 			return 0;
15319 		break;
15320 	case BPF_JLT:
15321 		if (umax1 < umin2)
15322 			return 1;
15323 		else if (umin1 >= umax2)
15324 			return 0;
15325 		break;
15326 	case BPF_JSLT:
15327 		if (smax1 < smin2)
15328 			return 1;
15329 		else if (smin1 >= smax2)
15330 			return 0;
15331 		break;
15332 	case BPF_JGE:
15333 		if (umin1 >= umax2)
15334 			return 1;
15335 		else if (umax1 < umin2)
15336 			return 0;
15337 		break;
15338 	case BPF_JSGE:
15339 		if (smin1 >= smax2)
15340 			return 1;
15341 		else if (smax1 < smin2)
15342 			return 0;
15343 		break;
15344 	case BPF_JLE:
15345 		if (umax1 <= umin2)
15346 			return 1;
15347 		else if (umin1 > umax2)
15348 			return 0;
15349 		break;
15350 	case BPF_JSLE:
15351 		if (smax1 <= smin2)
15352 			return 1;
15353 		else if (smin1 > smax2)
15354 			return 0;
15355 		break;
15356 	}
15357 
15358 	return simulate_both_branches_taken(env, opcode, is_jmp32);
15359 }
15360 
15361 static int flip_opcode(u32 opcode)
15362 {
15363 	/* How can we transform "a <op> b" into "b <op> a"? */
15364 	static const u8 opcode_flip[16] = {
15365 		/* these stay the same */
15366 		[BPF_JEQ  >> 4] = BPF_JEQ,
15367 		[BPF_JNE  >> 4] = BPF_JNE,
15368 		[BPF_JSET >> 4] = BPF_JSET,
15369 		/* these swap "lesser" and "greater" (L and G in the opcodes) */
15370 		[BPF_JGE  >> 4] = BPF_JLE,
15371 		[BPF_JGT  >> 4] = BPF_JLT,
15372 		[BPF_JLE  >> 4] = BPF_JGE,
15373 		[BPF_JLT  >> 4] = BPF_JGT,
15374 		[BPF_JSGE >> 4] = BPF_JSLE,
15375 		[BPF_JSGT >> 4] = BPF_JSLT,
15376 		[BPF_JSLE >> 4] = BPF_JSGE,
15377 		[BPF_JSLT >> 4] = BPF_JSGT
15378 	};
15379 	return opcode_flip[opcode >> 4];
15380 }
15381 
15382 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
15383 				   struct bpf_reg_state *src_reg,
15384 				   u8 opcode)
15385 {
15386 	struct bpf_reg_state *pkt;
15387 
15388 	if (src_reg->type == PTR_TO_PACKET_END) {
15389 		pkt = dst_reg;
15390 	} else if (dst_reg->type == PTR_TO_PACKET_END) {
15391 		pkt = src_reg;
15392 		opcode = flip_opcode(opcode);
15393 	} else {
15394 		return -1;
15395 	}
15396 
15397 	if (pkt->range >= 0)
15398 		return -1;
15399 
15400 	switch (opcode) {
15401 	case BPF_JLE:
15402 		/* pkt <= pkt_end */
15403 		fallthrough;
15404 	case BPF_JGT:
15405 		/* pkt > pkt_end */
15406 		if (pkt->range == BEYOND_PKT_END)
15407 			/* pkt has at last one extra byte beyond pkt_end */
15408 			return opcode == BPF_JGT;
15409 		break;
15410 	case BPF_JLT:
15411 		/* pkt < pkt_end */
15412 		fallthrough;
15413 	case BPF_JGE:
15414 		/* pkt >= pkt_end */
15415 		if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
15416 			return opcode == BPF_JGE;
15417 		break;
15418 	}
15419 	return -1;
15420 }
15421 
15422 /* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;"
15423  * and return:
15424  *  1 - branch will be taken and "goto target" will be executed
15425  *  0 - branch will not be taken and fall-through to next insn
15426  * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value
15427  *      range [0,10]
15428  */
15429 static int is_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1,
15430 			   struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32)
15431 {
15432 	if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32)
15433 		return is_pkt_ptr_branch_taken(reg1, reg2, opcode);
15434 
15435 	if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) {
15436 		u64 val;
15437 
15438 		/* arrange that reg2 is a scalar, and reg1 is a pointer */
15439 		if (!is_reg_const(reg2, is_jmp32)) {
15440 			opcode = flip_opcode(opcode);
15441 			swap(reg1, reg2);
15442 		}
15443 		/* and ensure that reg2 is a constant */
15444 		if (!is_reg_const(reg2, is_jmp32))
15445 			return -1;
15446 
15447 		if (!reg_not_null(env, reg1))
15448 			return -1;
15449 
15450 		/* If pointer is valid tests against zero will fail so we can
15451 		 * use this to direct branch taken.
15452 		 */
15453 		val = reg_const_value(reg2, is_jmp32);
15454 		if (val != 0)
15455 			return -1;
15456 
15457 		switch (opcode) {
15458 		case BPF_JEQ:
15459 			return 0;
15460 		case BPF_JNE:
15461 			return 1;
15462 		default:
15463 			return -1;
15464 		}
15465 	}
15466 
15467 	/* now deal with two scalars, but not necessarily constants */
15468 	return is_scalar_branch_taken(env, reg1, reg2, opcode, is_jmp32);
15469 }
15470 
15471 /* Opcode that corresponds to a *false* branch condition.
15472  * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2
15473  */
15474 static u8 rev_opcode(u8 opcode)
15475 {
15476 	switch (opcode) {
15477 	case BPF_JEQ:		return BPF_JNE;
15478 	case BPF_JNE:		return BPF_JEQ;
15479 	/* JSET doesn't have it's reverse opcode in BPF, so add
15480 	 * BPF_X flag to denote the reverse of that operation
15481 	 */
15482 	case BPF_JSET:		return BPF_JSET | BPF_X;
15483 	case BPF_JSET | BPF_X:	return BPF_JSET;
15484 	case BPF_JGE:		return BPF_JLT;
15485 	case BPF_JGT:		return BPF_JLE;
15486 	case BPF_JLE:		return BPF_JGT;
15487 	case BPF_JLT:		return BPF_JGE;
15488 	case BPF_JSGE:		return BPF_JSLT;
15489 	case BPF_JSGT:		return BPF_JSLE;
15490 	case BPF_JSLE:		return BPF_JSGT;
15491 	case BPF_JSLT:		return BPF_JSGE;
15492 	default:		return 0;
15493 	}
15494 }
15495 
15496 /* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */
15497 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15498 				u8 opcode, bool is_jmp32)
15499 {
15500 	struct tnum t;
15501 	u64 val;
15502 
15503 	/* In case of GE/GT/SGE/JST, reuse LE/LT/SLE/SLT logic from below */
15504 	switch (opcode) {
15505 	case BPF_JGE:
15506 	case BPF_JGT:
15507 	case BPF_JSGE:
15508 	case BPF_JSGT:
15509 		opcode = flip_opcode(opcode);
15510 		swap(reg1, reg2);
15511 		break;
15512 	default:
15513 		break;
15514 	}
15515 
15516 	switch (opcode) {
15517 	case BPF_JEQ:
15518 		if (is_jmp32) {
15519 			reg1->r32 = cnum32_intersect(reg1->r32, reg2->r32);
15520 			reg2->r32 = reg1->r32;
15521 
15522 			t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off));
15523 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15524 			reg2->var_off = tnum_with_subreg(reg2->var_off, t);
15525 		} else {
15526 			reg1->r64 = cnum64_intersect(reg1->r64, reg2->r64);
15527 			reg2->r64 = reg1->r64;
15528 
15529 			reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off);
15530 			reg2->var_off = reg1->var_off;
15531 		}
15532 		break;
15533 	case BPF_JNE:
15534 		if (!is_reg_const(reg2, is_jmp32))
15535 			swap(reg1, reg2);
15536 		if (!is_reg_const(reg2, is_jmp32))
15537 			break;
15538 
15539 		/* try to recompute the bound of reg1 if reg2 is a const and
15540 		 * is exactly the edge of reg1.
15541 		 */
15542 		val = reg_const_value(reg2, is_jmp32);
15543 		if (is_jmp32) {
15544 			/* Complement of the range [val, val] as cnum32. */
15545 			cnum32_intersect_with(&reg1->r32, (struct cnum32){ val + 1, U32_MAX - 1 });
15546 		} else {
15547 			/* Complement of the range [val, val] as cnum64. */
15548 			cnum64_intersect_with(&reg1->r64, (struct cnum64){ val + 1, U64_MAX - 1 });
15549 		}
15550 		break;
15551 	case BPF_JSET:
15552 		if (!is_reg_const(reg2, is_jmp32))
15553 			swap(reg1, reg2);
15554 		if (!is_reg_const(reg2, is_jmp32))
15555 			break;
15556 		val = reg_const_value(reg2, is_jmp32);
15557 		/* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X)
15558 		 * requires single bit to learn something useful. E.g., if we
15559 		 * know that `r1 & 0x3` is true, then which bits (0, 1, or both)
15560 		 * are actually set? We can learn something definite only if
15561 		 * it's a single-bit value to begin with.
15562 		 *
15563 		 * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have
15564 		 * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor
15565 		 * bit 1 is set, which we can readily use in adjustments.
15566 		 */
15567 		if (!is_power_of_2(val))
15568 			break;
15569 		if (is_jmp32) {
15570 			t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val));
15571 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15572 		} else {
15573 			reg1->var_off = tnum_or(reg1->var_off, tnum_const(val));
15574 		}
15575 		break;
15576 	case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */
15577 		if (!is_reg_const(reg2, is_jmp32))
15578 			swap(reg1, reg2);
15579 		if (!is_reg_const(reg2, is_jmp32))
15580 			break;
15581 		val = reg_const_value(reg2, is_jmp32);
15582 		/* Forget the ranges before narrowing tnums, to avoid invariant
15583 		 * violations if we're on a dead branch.
15584 		 */
15585 		__mark_reg_unbounded(reg1);
15586 		if (is_jmp32) {
15587 			t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val));
15588 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15589 		} else {
15590 			reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val));
15591 		}
15592 		break;
15593 	case BPF_JLE:
15594 		if (is_jmp32) {
15595 			cnum32_intersect_with_urange(&reg1->r32, 0, reg_u32_max(reg2));
15596 			cnum32_intersect_with_urange(&reg2->r32, reg_u32_min(reg1), U32_MAX);
15597 		} else {
15598 			cnum64_intersect_with_urange(&reg1->r64, 0, reg_umax(reg2));
15599 			cnum64_intersect_with_urange(&reg2->r64, reg_umin(reg1), U64_MAX);
15600 		}
15601 		break;
15602 	case BPF_JLT:
15603 		if (is_jmp32) {
15604 			cnum32_intersect_with_urange(&reg1->r32, 0, reg_u32_max(reg2) - 1);
15605 			cnum32_intersect_with_urange(&reg2->r32, reg_u32_min(reg1) + 1, U32_MAX);
15606 		} else {
15607 			cnum64_intersect_with_urange(&reg1->r64, 0, reg_umax(reg2) - 1);
15608 			cnum64_intersect_with_urange(&reg2->r64, reg_umin(reg1) + 1, U64_MAX);
15609 		}
15610 		break;
15611 	case BPF_JSLE:
15612 		if (is_jmp32) {
15613 			cnum32_intersect_with_srange(&reg1->r32, S32_MIN, reg_s32_max(reg2));
15614 			cnum32_intersect_with_srange(&reg2->r32, reg_s32_min(reg1), S32_MAX);
15615 		} else {
15616 			cnum64_intersect_with_srange(&reg1->r64, S64_MIN, reg_smax(reg2));
15617 			cnum64_intersect_with_srange(&reg2->r64, reg_smin(reg1), S64_MAX);
15618 		}
15619 		break;
15620 	case BPF_JSLT:
15621 		if (is_jmp32) {
15622 			cnum32_intersect_with_srange(&reg1->r32, S32_MIN, reg_s32_max(reg2) - 1);
15623 			cnum32_intersect_with_srange(&reg2->r32, reg_s32_min(reg1) + 1, S32_MAX);
15624 		} else {
15625 			cnum64_intersect_with_srange(&reg1->r64, S64_MIN, reg_smax(reg2) - 1);
15626 			cnum64_intersect_with_srange(&reg2->r64, reg_smin(reg1) + 1, S64_MAX);
15627 		}
15628 		break;
15629 	default:
15630 		return;
15631 	}
15632 }
15633 
15634 /* Check for invariant violations on the registers for both branches of a condition */
15635 static int regs_bounds_sanity_check_branches(struct bpf_verifier_env *env)
15636 {
15637 	int err;
15638 
15639 	err = reg_bounds_sanity_check(env, &env->true_reg1, "true_reg1");
15640 	err = err ?: reg_bounds_sanity_check(env, &env->true_reg2, "true_reg2");
15641 	err = err ?: reg_bounds_sanity_check(env, &env->false_reg1, "false_reg1");
15642 	err = err ?: reg_bounds_sanity_check(env, &env->false_reg2, "false_reg2");
15643 	return err;
15644 }
15645 
15646 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
15647 				 struct bpf_reg_state *reg, u32 id,
15648 				 bool is_null)
15649 {
15650 	if (type_may_be_null(reg->type) && reg->id == id &&
15651 	    (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
15652 		/* Old offset should have been known-zero, because we don't
15653 		 * allow pointer arithmetic on pointers that might be NULL.
15654 		 * If we see this happening, don't convert the register.
15655 		 *
15656 		 * But in some cases, some helpers that return local kptrs
15657 		 * advance offset for the returned pointer. In those cases,
15658 		 * it is fine to expect to see reg->var_off.
15659 		 */
15660 		if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
15661 		    WARN_ON_ONCE(!tnum_equals_const(reg->var_off, 0)))
15662 			return;
15663 		if (is_null) {
15664 			/* We don't need id from this point
15665 			 * onwards anymore, thus we should better reset it,
15666 			 * so that state pruning has chances to take effect.
15667 			 */
15668 			__mark_reg_known_zero(reg);
15669 			reg->type = SCALAR_VALUE;
15670 
15671 			return;
15672 		}
15673 
15674 		mark_ptr_not_null_reg(reg);
15675 
15676 		/*
15677 		 * reg->id is preserved for object relationship tracking
15678 		 * and spin_lock lock state tracking
15679 		 */
15680 	}
15681 }
15682 
15683 /* The logic is similar to find_good_pkt_pointers(), both could eventually
15684  * be folded together at some point.
15685  */
15686 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
15687 				  bool is_null)
15688 {
15689 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
15690 	struct bpf_reg_state *regs = state->regs, *reg;
15691 	u32 id = regs[regno].id;
15692 
15693 	if (is_null && find_reference_state(vstate, id))
15694 		/* regs[regno] is in the " == NULL" branch.
15695 		 * No one could have freed the reference state before
15696 		 * doing the NULL check.
15697 		 */
15698 		WARN_ON_ONCE(release_reference_nomark(vstate, id));
15699 
15700 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15701 		mark_ptr_or_null_reg(state, reg, id, is_null);
15702 	}));
15703 }
15704 
15705 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
15706 				   struct bpf_reg_state *dst_reg,
15707 				   struct bpf_reg_state *src_reg,
15708 				   struct bpf_verifier_state *this_branch,
15709 				   struct bpf_verifier_state *other_branch)
15710 {
15711 	if (BPF_SRC(insn->code) != BPF_X)
15712 		return false;
15713 
15714 	/* Pointers are always 64-bit. */
15715 	if (BPF_CLASS(insn->code) == BPF_JMP32)
15716 		return false;
15717 
15718 	switch (BPF_OP(insn->code)) {
15719 	case BPF_JGT:
15720 		if ((dst_reg->type == PTR_TO_PACKET &&
15721 		     src_reg->type == PTR_TO_PACKET_END) ||
15722 		    (dst_reg->type == PTR_TO_PACKET_META &&
15723 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15724 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
15725 			find_good_pkt_pointers(this_branch, dst_reg,
15726 					       dst_reg->type, false);
15727 			mark_pkt_end(other_branch, insn->dst_reg, true);
15728 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15729 			    src_reg->type == PTR_TO_PACKET) ||
15730 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15731 			    src_reg->type == PTR_TO_PACKET_META)) {
15732 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
15733 			find_good_pkt_pointers(other_branch, src_reg,
15734 					       src_reg->type, true);
15735 			mark_pkt_end(this_branch, insn->src_reg, false);
15736 		} else {
15737 			return false;
15738 		}
15739 		break;
15740 	case BPF_JLT:
15741 		if ((dst_reg->type == PTR_TO_PACKET &&
15742 		     src_reg->type == PTR_TO_PACKET_END) ||
15743 		    (dst_reg->type == PTR_TO_PACKET_META &&
15744 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15745 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
15746 			find_good_pkt_pointers(other_branch, dst_reg,
15747 					       dst_reg->type, true);
15748 			mark_pkt_end(this_branch, insn->dst_reg, false);
15749 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15750 			    src_reg->type == PTR_TO_PACKET) ||
15751 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15752 			    src_reg->type == PTR_TO_PACKET_META)) {
15753 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
15754 			find_good_pkt_pointers(this_branch, src_reg,
15755 					       src_reg->type, false);
15756 			mark_pkt_end(other_branch, insn->src_reg, true);
15757 		} else {
15758 			return false;
15759 		}
15760 		break;
15761 	case BPF_JGE:
15762 		if ((dst_reg->type == PTR_TO_PACKET &&
15763 		     src_reg->type == PTR_TO_PACKET_END) ||
15764 		    (dst_reg->type == PTR_TO_PACKET_META &&
15765 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15766 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
15767 			find_good_pkt_pointers(this_branch, dst_reg,
15768 					       dst_reg->type, true);
15769 			mark_pkt_end(other_branch, insn->dst_reg, false);
15770 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15771 			    src_reg->type == PTR_TO_PACKET) ||
15772 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15773 			    src_reg->type == PTR_TO_PACKET_META)) {
15774 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
15775 			find_good_pkt_pointers(other_branch, src_reg,
15776 					       src_reg->type, false);
15777 			mark_pkt_end(this_branch, insn->src_reg, true);
15778 		} else {
15779 			return false;
15780 		}
15781 		break;
15782 	case BPF_JLE:
15783 		if ((dst_reg->type == PTR_TO_PACKET &&
15784 		     src_reg->type == PTR_TO_PACKET_END) ||
15785 		    (dst_reg->type == PTR_TO_PACKET_META &&
15786 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15787 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
15788 			find_good_pkt_pointers(other_branch, dst_reg,
15789 					       dst_reg->type, false);
15790 			mark_pkt_end(this_branch, insn->dst_reg, true);
15791 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15792 			    src_reg->type == PTR_TO_PACKET) ||
15793 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15794 			    src_reg->type == PTR_TO_PACKET_META)) {
15795 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
15796 			find_good_pkt_pointers(this_branch, src_reg,
15797 					       src_reg->type, true);
15798 			mark_pkt_end(other_branch, insn->src_reg, false);
15799 		} else {
15800 			return false;
15801 		}
15802 		break;
15803 	default:
15804 		return false;
15805 	}
15806 
15807 	return true;
15808 }
15809 
15810 static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_state *reg,
15811 				  u32 id, u32 frameno, u32 spi_or_reg, bool is_reg)
15812 {
15813 	struct linked_reg *e;
15814 
15815 	if (reg->type != SCALAR_VALUE || (reg->id & ~BPF_ADD_CONST) != id)
15816 		return;
15817 
15818 	e = linked_regs_push(reg_set);
15819 	if (e) {
15820 		e->frameno = frameno;
15821 		e->is_reg = is_reg;
15822 		e->regno = spi_or_reg;
15823 	} else {
15824 		clear_scalar_id(reg);
15825 	}
15826 }
15827 
15828 /* For all R being scalar registers or spilled scalar registers
15829  * in verifier state, save R in linked_regs if R->id == id.
15830  * If there are too many Rs sharing same id, reset id for leftover Rs.
15831  */
15832 static void collect_linked_regs(struct bpf_verifier_env *env,
15833 				struct bpf_verifier_state *vstate,
15834 				u32 id,
15835 				struct linked_regs *linked_regs)
15836 {
15837 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
15838 	struct bpf_func_state *func;
15839 	struct bpf_reg_state *reg;
15840 	u16 live_regs;
15841 	int i, j;
15842 
15843 	id = id & ~BPF_ADD_CONST;
15844 	for (i = vstate->curframe; i >= 0; i--) {
15845 		live_regs = aux[bpf_frame_insn_idx(vstate, i)].live_regs_before;
15846 		func = vstate->frame[i];
15847 		for (j = 0; j < BPF_REG_FP; j++) {
15848 			if (!(live_regs & BIT(j)))
15849 				continue;
15850 			reg = &func->regs[j];
15851 			__collect_linked_regs(linked_regs, reg, id, i, j, true);
15852 		}
15853 		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
15854 			if (!bpf_is_spilled_reg(&func->stack[j]))
15855 				continue;
15856 			reg = &func->stack[j].spilled_ptr;
15857 			__collect_linked_regs(linked_regs, reg, id, i, j, false);
15858 		}
15859 	}
15860 }
15861 
15862 /* For all R in linked_regs, copy known_reg range into R
15863  * if R->id == known_reg->id.
15864  */
15865 static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_state *vstate,
15866 			     struct bpf_reg_state *known_reg, struct linked_regs *linked_regs)
15867 {
15868 	struct bpf_reg_state fake_reg;
15869 	struct bpf_reg_state *reg;
15870 	struct linked_reg *e;
15871 	int i;
15872 
15873 	for (i = 0; i < linked_regs->cnt; ++i) {
15874 		e = &linked_regs->entries[i];
15875 		reg = e->is_reg ? &vstate->frame[e->frameno]->regs[e->regno]
15876 				: &vstate->frame[e->frameno]->stack[e->spi].spilled_ptr;
15877 		if (reg->type != SCALAR_VALUE || reg == known_reg)
15878 			continue;
15879 		if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST))
15880 			continue;
15881 		/*
15882 		 * Skip mixed 32/64-bit links: the delta relationship doesn't
15883 		 * hold across different ALU widths.
15884 		 */
15885 		if (((reg->id ^ known_reg->id) & BPF_ADD_CONST) == BPF_ADD_CONST)
15886 			continue;
15887 		if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
15888 		    reg->delta == known_reg->delta) {
15889 			s32 saved_subreg_def = reg->subreg_def;
15890 
15891 			*reg = *known_reg;
15892 			reg->subreg_def = saved_subreg_def;
15893 		} else {
15894 			s32 saved_subreg_def = reg->subreg_def;
15895 			s32 saved_off = reg->delta;
15896 			u32 saved_id = reg->id;
15897 
15898 			fake_reg.type = SCALAR_VALUE;
15899 			__mark_reg_known(&fake_reg, (s64)reg->delta - (s64)known_reg->delta);
15900 
15901 			/* reg = known_reg; reg += delta */
15902 			*reg = *known_reg;
15903 			/*
15904 			 * Must preserve off, id and subreg_def flag,
15905 			 * otherwise another sync_linked_regs() will be incorrect.
15906 			 */
15907 			reg->delta = saved_off;
15908 			reg->id = saved_id;
15909 			reg->subreg_def = saved_subreg_def;
15910 
15911 			scalar32_min_max_add(reg, &fake_reg);
15912 			scalar_min_max_add(reg, &fake_reg);
15913 			reg->var_off = tnum_add(reg->var_off, fake_reg.var_off);
15914 			if ((reg->id | known_reg->id) & BPF_ADD_CONST32)
15915 				zext_32_to_64(reg);
15916 			reg_bounds_sync(reg);
15917 		}
15918 		if (e->is_reg)
15919 			mark_reg_scratched(env, e->regno);
15920 		else
15921 			mark_stack_slot_scratched(env, e->spi);
15922 	}
15923 }
15924 
15925 static int check_cond_jmp_op(struct bpf_verifier_env *env,
15926 			     struct bpf_insn *insn, int *insn_idx)
15927 {
15928 	struct bpf_verifier_state *this_branch = env->cur_state;
15929 	struct bpf_verifier_state *other_branch;
15930 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
15931 	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
15932 	struct bpf_reg_state *eq_branch_regs;
15933 	struct linked_regs linked_regs = {};
15934 	u8 opcode = BPF_OP(insn->code);
15935 	int insn_flags = 0;
15936 	bool is_jmp32;
15937 	int pred = -1;
15938 	int err;
15939 
15940 	/* Only conditional jumps are expected to reach here. */
15941 	if (opcode == BPF_JA || opcode > BPF_JCOND) {
15942 		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
15943 		return -EINVAL;
15944 	}
15945 
15946 	if (opcode == BPF_JCOND) {
15947 		struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
15948 		int idx = *insn_idx;
15949 
15950 		prev_st = find_prev_entry(env, cur_st->parent, idx);
15951 
15952 		/* branch out 'fallthrough' insn as a new state to explore */
15953 		queued_st = push_stack(env, idx + 1, idx, false);
15954 		if (IS_ERR(queued_st))
15955 			return PTR_ERR(queued_st);
15956 
15957 		queued_st->may_goto_depth++;
15958 		if (prev_st)
15959 			widen_imprecise_scalars(env, prev_st, queued_st);
15960 		*insn_idx += insn->off;
15961 		return 0;
15962 	}
15963 
15964 	/* check src2 operand */
15965 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15966 	if (err)
15967 		return err;
15968 
15969 	dst_reg = &regs[insn->dst_reg];
15970 	if (BPF_SRC(insn->code) == BPF_X) {
15971 		/* check src1 operand */
15972 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
15973 		if (err)
15974 			return err;
15975 
15976 		src_reg = &regs[insn->src_reg];
15977 		if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) &&
15978 		    is_pointer_value(env, insn->src_reg)) {
15979 			verbose(env, "R%d pointer comparison prohibited\n",
15980 				insn->src_reg);
15981 			return -EACCES;
15982 		}
15983 
15984 		if (src_reg->type == PTR_TO_STACK)
15985 			insn_flags |= INSN_F_SRC_REG_STACK;
15986 		if (dst_reg->type == PTR_TO_STACK)
15987 			insn_flags |= INSN_F_DST_REG_STACK;
15988 	} else {
15989 		src_reg = &env->fake_reg[0];
15990 		memset(src_reg, 0, sizeof(*src_reg));
15991 		src_reg->type = SCALAR_VALUE;
15992 		__mark_reg_known(src_reg, insn->imm);
15993 
15994 		if (dst_reg->type == PTR_TO_STACK)
15995 			insn_flags |= INSN_F_DST_REG_STACK;
15996 	}
15997 
15998 	if (insn_flags) {
15999 		err = bpf_push_jmp_history(env, this_branch, insn_flags, 0, 0, 0);
16000 		if (err)
16001 			return err;
16002 	}
16003 
16004 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
16005 	env->false_reg1 = *dst_reg;
16006 	env->false_reg2 = *src_reg;
16007 	env->true_reg1 = *dst_reg;
16008 	env->true_reg2 = *src_reg;
16009 	pred = is_branch_taken(env, dst_reg, src_reg, opcode, is_jmp32);
16010 	if (pred >= 0) {
16011 		/* If we get here with a dst_reg pointer type it is because
16012 		 * above is_branch_taken() special cased the 0 comparison.
16013 		 */
16014 		if (!__is_pointer_value(false, dst_reg))
16015 			err = mark_chain_precision(env, insn->dst_reg);
16016 		if (BPF_SRC(insn->code) == BPF_X && !err &&
16017 		    !__is_pointer_value(false, src_reg))
16018 			err = mark_chain_precision(env, insn->src_reg);
16019 		if (err)
16020 			return err;
16021 	}
16022 
16023 	if (pred == 1) {
16024 		/* Only follow the goto, ignore fall-through. If needed, push
16025 		 * the fall-through branch for simulation under speculative
16026 		 * execution.
16027 		 */
16028 		if (!env->bypass_spec_v1) {
16029 			err = sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx);
16030 			if (err < 0)
16031 				return err;
16032 		}
16033 		if (env->log.level & BPF_LOG_LEVEL)
16034 			print_insn_state(env, this_branch, this_branch->curframe);
16035 		*insn_idx += insn->off;
16036 		return 0;
16037 	} else if (pred == 0) {
16038 		/* Only follow the fall-through branch, since that's where the
16039 		 * program will go. If needed, push the goto branch for
16040 		 * simulation under speculative execution.
16041 		 */
16042 		if (!env->bypass_spec_v1) {
16043 			err = sanitize_speculative_path(env, insn, *insn_idx + insn->off + 1,
16044 							*insn_idx);
16045 			if (err < 0)
16046 				return err;
16047 		}
16048 		if (env->log.level & BPF_LOG_LEVEL)
16049 			print_insn_state(env, this_branch, this_branch->curframe);
16050 		return 0;
16051 	}
16052 
16053 	/* Push scalar registers sharing same ID to jump history,
16054 	 * do this before creating 'other_branch', so that both
16055 	 * 'this_branch' and 'other_branch' share this history
16056 	 * if parent state is created.
16057 	 */
16058 	if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id)
16059 		collect_linked_regs(env, this_branch, src_reg->id, &linked_regs);
16060 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id)
16061 		collect_linked_regs(env, this_branch, dst_reg->id, &linked_regs);
16062 	if (linked_regs.cnt > 1) {
16063 		err = bpf_push_jmp_history(env, this_branch, 0, 0, 0, linked_regs_pack(&linked_regs));
16064 		if (err)
16065 			return err;
16066 	}
16067 
16068 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, false);
16069 	if (IS_ERR(other_branch))
16070 		return PTR_ERR(other_branch);
16071 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
16072 
16073 	err = regs_bounds_sanity_check_branches(env);
16074 	if (err)
16075 		return err;
16076 
16077 	*dst_reg = env->false_reg1;
16078 	*src_reg = env->false_reg2;
16079 	other_branch_regs[insn->dst_reg] = env->true_reg1;
16080 	if (BPF_SRC(insn->code) == BPF_X)
16081 		other_branch_regs[insn->src_reg] = env->true_reg2;
16082 
16083 	if (BPF_SRC(insn->code) == BPF_X &&
16084 	    src_reg->type == SCALAR_VALUE && src_reg->id &&
16085 	    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
16086 		sync_linked_regs(env, this_branch, src_reg, &linked_regs);
16087 		sync_linked_regs(env, other_branch, &other_branch_regs[insn->src_reg],
16088 				 &linked_regs);
16089 	}
16090 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
16091 	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
16092 		sync_linked_regs(env, this_branch, dst_reg, &linked_regs);
16093 		sync_linked_regs(env, other_branch, &other_branch_regs[insn->dst_reg],
16094 				 &linked_regs);
16095 	}
16096 
16097 	/* if one pointer register is compared to another pointer
16098 	 * register check if PTR_MAYBE_NULL could be lifted.
16099 	 * E.g. register A - maybe null
16100 	 *      register B - not null
16101 	 * for JNE A, B, ... - A is not null in the false branch;
16102 	 * for JEQ A, B, ... - A is not null in the true branch.
16103 	 *
16104 	 * Since PTR_TO_BTF_ID points to a kernel struct that does
16105 	 * not need to be null checked by the BPF program, i.e.,
16106 	 * could be null even without PTR_MAYBE_NULL marking, so
16107 	 * only propagate nullness when neither reg is that type.
16108 	 */
16109 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
16110 	    __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
16111 	    type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
16112 	    base_type(src_reg->type) != PTR_TO_BTF_ID &&
16113 	    base_type(dst_reg->type) != PTR_TO_BTF_ID) {
16114 		eq_branch_regs = NULL;
16115 		switch (opcode) {
16116 		case BPF_JEQ:
16117 			eq_branch_regs = other_branch_regs;
16118 			break;
16119 		case BPF_JNE:
16120 			eq_branch_regs = regs;
16121 			break;
16122 		default:
16123 			/* do nothing */
16124 			break;
16125 		}
16126 		if (eq_branch_regs) {
16127 			if (type_may_be_null(src_reg->type))
16128 				mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
16129 			else
16130 				mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
16131 		}
16132 	}
16133 
16134 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
16135 	 * Also does the same detection for a register whose the value is
16136 	 * known to be 0.
16137 	 * NOTE: these optimizations below are related with pointer comparison
16138 	 *       which will never be JMP32.
16139 	 */
16140 	if (!is_jmp32 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
16141 	    type_may_be_null(dst_reg->type) &&
16142 	    ((BPF_SRC(insn->code) == BPF_K && insn->imm == 0) ||
16143 	     (BPF_SRC(insn->code) == BPF_X && bpf_register_is_null(src_reg)))) {
16144 		/* Mark all identical registers in each branch as either
16145 		 * safe or unknown depending R == 0 or R != 0 conditional.
16146 		 */
16147 		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
16148 				      opcode == BPF_JNE);
16149 		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
16150 				      opcode == BPF_JEQ);
16151 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
16152 					   this_branch, other_branch) &&
16153 		   is_pointer_value(env, insn->dst_reg)) {
16154 		verbose(env, "R%d pointer comparison prohibited\n",
16155 			insn->dst_reg);
16156 		return -EACCES;
16157 	}
16158 	if (env->log.level & BPF_LOG_LEVEL)
16159 		print_insn_state(env, this_branch, this_branch->curframe);
16160 	return 0;
16161 }
16162 
16163 /* verify BPF_LD_IMM64 instruction */
16164 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
16165 {
16166 	struct bpf_insn_aux_data *aux = cur_aux(env);
16167 	struct bpf_reg_state *regs = cur_regs(env);
16168 	struct bpf_reg_state *dst_reg;
16169 	struct bpf_map *map;
16170 	int err;
16171 
16172 	if (BPF_SIZE(insn->code) != BPF_DW) {
16173 		verbose(env, "invalid BPF_LD_IMM insn\n");
16174 		return -EINVAL;
16175 	}
16176 
16177 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
16178 	if (err)
16179 		return err;
16180 
16181 	dst_reg = &regs[insn->dst_reg];
16182 	if (insn->src_reg == 0) {
16183 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
16184 
16185 		dst_reg->type = SCALAR_VALUE;
16186 		__mark_reg_known(&regs[insn->dst_reg], imm);
16187 		return 0;
16188 	}
16189 
16190 	/* All special src_reg cases are listed below. From this point onwards
16191 	 * we either succeed and assign a corresponding dst_reg->type after
16192 	 * zeroing the offset, or fail and reject the program.
16193 	 */
16194 	mark_reg_known_zero(env, regs, insn->dst_reg);
16195 
16196 	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
16197 		dst_reg->type = aux->btf_var.reg_type;
16198 		switch (base_type(dst_reg->type)) {
16199 		case PTR_TO_MEM:
16200 			dst_reg->mem_size = aux->btf_var.mem_size;
16201 			break;
16202 		case PTR_TO_BTF_ID:
16203 			dst_reg->btf = aux->btf_var.btf;
16204 			dst_reg->btf_id = aux->btf_var.btf_id;
16205 			break;
16206 		default:
16207 			verifier_bug(env, "pseudo btf id: unexpected dst reg type");
16208 			return -EFAULT;
16209 		}
16210 		return 0;
16211 	}
16212 
16213 	if (insn->src_reg == BPF_PSEUDO_FUNC) {
16214 		struct bpf_prog_aux *aux = env->prog->aux;
16215 		u32 subprogno = bpf_find_subprog(env,
16216 						 env->insn_idx + insn->imm + 1);
16217 
16218 		if (!aux->func_info) {
16219 			verbose(env, "missing btf func_info\n");
16220 			return -EINVAL;
16221 		}
16222 		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
16223 			verbose(env, "callback function not static\n");
16224 			return -EINVAL;
16225 		}
16226 
16227 		dst_reg->type = PTR_TO_FUNC;
16228 		dst_reg->subprogno = subprogno;
16229 		return 0;
16230 	}
16231 
16232 	map = env->used_maps[aux->map_index];
16233 
16234 	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
16235 	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
16236 		if (map->map_type == BPF_MAP_TYPE_ARENA) {
16237 			__mark_reg_unknown(env, dst_reg);
16238 			dst_reg->map_ptr = map;
16239 			return 0;
16240 		}
16241 		__mark_reg_known(dst_reg, aux->map_off);
16242 		dst_reg->type = PTR_TO_MAP_VALUE;
16243 		dst_reg->map_ptr = map;
16244 		WARN_ON_ONCE(map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
16245 			     map->max_entries != 1);
16246 		/* We want reg->id to be same (0) as map_value is not distinct */
16247 	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
16248 		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
16249 		dst_reg->type = CONST_PTR_TO_MAP;
16250 		dst_reg->map_ptr = map;
16251 	} else {
16252 		verifier_bug(env, "unexpected src reg value for ldimm64");
16253 		return -EFAULT;
16254 	}
16255 
16256 	return 0;
16257 }
16258 
16259 static bool may_access_skb(enum bpf_prog_type type)
16260 {
16261 	switch (type) {
16262 	case BPF_PROG_TYPE_SOCKET_FILTER:
16263 	case BPF_PROG_TYPE_SCHED_CLS:
16264 	case BPF_PROG_TYPE_SCHED_ACT:
16265 		return true;
16266 	default:
16267 		return false;
16268 	}
16269 }
16270 
16271 /* verify safety of LD_ABS|LD_IND instructions:
16272  * - they can only appear in the programs where ctx == skb
16273  * - since they are wrappers of function calls, they scratch R1-R5 registers,
16274  *   preserve R6-R9, and store return value into R0
16275  *
16276  * Implicit input:
16277  *   ctx == skb == R6 == CTX
16278  *
16279  * Explicit input:
16280  *   SRC == any register
16281  *   IMM == 32-bit immediate
16282  *
16283  * Output:
16284  *   R0 - 8/16/32-bit skb data converted to cpu endianness
16285  */
16286 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
16287 {
16288 	struct bpf_reg_state *regs = cur_regs(env);
16289 	static const int ctx_reg = BPF_REG_6;
16290 	u8 mode = BPF_MODE(insn->code);
16291 	int i, err;
16292 
16293 	if (!may_access_skb(resolve_prog_type(env->prog))) {
16294 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
16295 		return -EINVAL;
16296 	}
16297 
16298 	if (!env->ops->gen_ld_abs) {
16299 		verifier_bug(env, "gen_ld_abs is null");
16300 		return -EFAULT;
16301 	}
16302 
16303 	/* check whether implicit source operand (register R6) is readable */
16304 	err = check_reg_arg(env, ctx_reg, SRC_OP);
16305 	if (err)
16306 		return err;
16307 
16308 	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
16309 	 * gen_ld_abs() may terminate the program at runtime, leading to
16310 	 * reference leak.
16311 	 */
16312 	err = check_resource_leak(env, false, true, "BPF_LD_[ABS|IND]");
16313 	if (err)
16314 		return err;
16315 
16316 	if (regs[ctx_reg].type != PTR_TO_CTX) {
16317 		verbose(env,
16318 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
16319 		return -EINVAL;
16320 	}
16321 
16322 	if (mode == BPF_IND) {
16323 		/* check explicit source operand */
16324 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
16325 		if (err)
16326 			return err;
16327 	}
16328 
16329 	err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
16330 	if (err < 0)
16331 		return err;
16332 
16333 	/* reset caller saved regs to unreadable */
16334 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
16335 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
16336 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
16337 	}
16338 
16339 	/* mark destination R0 register as readable, since it contains
16340 	 * the value fetched from the packet.
16341 	 * Already marked as written above.
16342 	 */
16343 	mark_reg_unknown(env, regs, BPF_REG_0);
16344 	/* ld_abs load up to 32-bit skb data. */
16345 	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
16346 	/*
16347 	 * See bpf_gen_ld_abs() which emits a hidden BPF_EXIT with r0=0
16348 	 * which must be explored by the verifier when in a subprog.
16349 	 */
16350 	if (env->cur_state->curframe) {
16351 		struct bpf_verifier_state *branch;
16352 
16353 		mark_reg_scratched(env, BPF_REG_0);
16354 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
16355 		if (IS_ERR(branch))
16356 			return PTR_ERR(branch);
16357 		mark_reg_known_zero(env, regs, BPF_REG_0);
16358 		err = prepare_func_exit(env, &env->insn_idx);
16359 		if (err)
16360 			return err;
16361 		env->insn_idx--;
16362 	}
16363 	return 0;
16364 }
16365 
16366 
16367 static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_range *range)
16368 {
16369 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
16370 
16371 	/* Default return value range. */
16372 	*range = retval_range(0, 1);
16373 
16374 	switch (prog_type) {
16375 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
16376 		switch (env->prog->expected_attach_type) {
16377 		case BPF_CGROUP_UDP4_RECVMSG:
16378 		case BPF_CGROUP_UDP6_RECVMSG:
16379 		case BPF_CGROUP_UNIX_RECVMSG:
16380 		case BPF_CGROUP_INET4_GETPEERNAME:
16381 		case BPF_CGROUP_INET6_GETPEERNAME:
16382 		case BPF_CGROUP_UNIX_GETPEERNAME:
16383 		case BPF_CGROUP_INET4_GETSOCKNAME:
16384 		case BPF_CGROUP_INET6_GETSOCKNAME:
16385 		case BPF_CGROUP_UNIX_GETSOCKNAME:
16386 			*range = retval_range(1, 1);
16387 			break;
16388 		case BPF_CGROUP_INET4_BIND:
16389 		case BPF_CGROUP_INET6_BIND:
16390 			*range = retval_range(0, 3);
16391 			break;
16392 		default:
16393 			break;
16394 		}
16395 		break;
16396 	case BPF_PROG_TYPE_CGROUP_SKB:
16397 		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS)
16398 			*range = retval_range(0, 3);
16399 		break;
16400 	case BPF_PROG_TYPE_CGROUP_SOCK:
16401 	case BPF_PROG_TYPE_SOCK_OPS:
16402 	case BPF_PROG_TYPE_CGROUP_DEVICE:
16403 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
16404 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
16405 		break;
16406 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
16407 		if (!env->prog->aux->attach_btf_id)
16408 			return false;
16409 		*range = retval_range(0, 0);
16410 		break;
16411 	case BPF_PROG_TYPE_TRACING:
16412 		switch (env->prog->expected_attach_type) {
16413 		case BPF_TRACE_FENTRY:
16414 		case BPF_TRACE_FEXIT:
16415 		case BPF_TRACE_FSESSION:
16416 		case BPF_TRACE_FENTRY_MULTI:
16417 		case BPF_TRACE_FEXIT_MULTI:
16418 		case BPF_TRACE_FSESSION_MULTI:
16419 			*range = retval_range(0, 0);
16420 			break;
16421 		case BPF_TRACE_RAW_TP:
16422 		case BPF_MODIFY_RETURN:
16423 			return false;
16424 		case BPF_TRACE_ITER:
16425 		default:
16426 			break;
16427 		}
16428 		break;
16429 	case BPF_PROG_TYPE_KPROBE:
16430 		switch (env->prog->expected_attach_type) {
16431 		case BPF_TRACE_KPROBE_SESSION:
16432 		case BPF_TRACE_UPROBE_SESSION:
16433 			break;
16434 		default:
16435 			return false;
16436 		}
16437 		break;
16438 	case BPF_PROG_TYPE_SK_LOOKUP:
16439 		*range = retval_range(SK_DROP, SK_PASS);
16440 		break;
16441 
16442 	case BPF_PROG_TYPE_LSM:
16443 		if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
16444 			/* no range found, any return value is allowed */
16445 			if (!get_func_retval_range(env->prog, range))
16446 				return false;
16447 			/* no restricted range, any return value is allowed */
16448 			if (range->minval == S32_MIN && range->maxval == S32_MAX)
16449 				return false;
16450 			range->return_32bit = true;
16451 		} else if (!env->prog->aux->attach_func_proto->type) {
16452 			/* Make sure programs that attach to void
16453 			 * hooks don't try to modify return value.
16454 			 */
16455 			*range = retval_range(1, 1);
16456 		}
16457 		break;
16458 
16459 	case BPF_PROG_TYPE_NETFILTER:
16460 		*range = retval_range(NF_DROP, NF_ACCEPT);
16461 		break;
16462 	case BPF_PROG_TYPE_STRUCT_OPS:
16463 		*range = retval_range(0, 0);
16464 		break;
16465 	case BPF_PROG_TYPE_EXT:
16466 		/* freplace program can return anything as its return value
16467 		 * depends on the to-be-replaced kernel func or bpf program.
16468 		 */
16469 	default:
16470 		return false;
16471 	}
16472 
16473 	/* Continue calculating. */
16474 
16475 	return true;
16476 }
16477 
16478 static bool program_returns_void(struct bpf_verifier_env *env)
16479 {
16480 	const struct bpf_prog *prog = env->prog;
16481 	enum bpf_prog_type prog_type = prog->type;
16482 
16483 	switch (prog_type) {
16484 	case BPF_PROG_TYPE_LSM:
16485 		/* See return_retval_range, for BPF_LSM_CGROUP can be 0 or 0-1 depending on hook. */
16486 		if (prog->expected_attach_type != BPF_LSM_CGROUP &&
16487 		    !prog->aux->attach_func_proto->type)
16488 			return true;
16489 		break;
16490 	case BPF_PROG_TYPE_STRUCT_OPS:
16491 		if (!prog->aux->attach_func_proto->type)
16492 			return true;
16493 		break;
16494 	case BPF_PROG_TYPE_EXT:
16495 		/*
16496 		 * If the actual program is an extension, let it
16497 		 * return void - attaching will succeed only if the
16498 		 * program being replaced also returns void, and since
16499 		 * it has passed verification its actual type doesn't matter.
16500 		 */
16501 		if (subprog_returns_void(env, 0))
16502 			return true;
16503 		break;
16504 	default:
16505 		break;
16506 	}
16507 	return false;
16508 }
16509 
16510 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
16511 {
16512 	const char *exit_ctx = "At program exit";
16513 	struct tnum enforce_attach_type_range = tnum_unknown;
16514 	const struct bpf_prog *prog = env->prog;
16515 	struct bpf_reg_state *reg = reg_state(env, regno);
16516 	struct bpf_retval_range range = retval_range(0, 1);
16517 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
16518 	struct bpf_func_state *frame = env->cur_state->frame[0];
16519 	const struct btf_type *reg_type, *ret_type = NULL;
16520 	int err;
16521 
16522 	/* LSM and struct_ops func-ptr's return type could be "void" */
16523 	if (!frame->in_async_callback_fn && program_returns_void(env))
16524 		return 0;
16525 
16526 	if (prog_type == BPF_PROG_TYPE_STRUCT_OPS) {
16527 		/* Allow a struct_ops program to return a referenced kptr if it
16528 		 * matches the operator's return type and is in its unmodified
16529 		 * form. A scalar zero (i.e., a null pointer) is also allowed.
16530 		 */
16531 		reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL;
16532 		ret_type = btf_type_resolve_ptr(prog->aux->attach_btf,
16533 						prog->aux->attach_func_proto->type,
16534 						NULL);
16535 		if (ret_type && ret_type == reg_type && reg_is_referenced(env, reg))
16536 			return __check_ptr_off_reg(env, reg, argno_from_reg(regno), false);
16537 	}
16538 
16539 	/* eBPF calling convention is such that R0 is used
16540 	 * to return the value from eBPF program.
16541 	 * Make sure that it's readable at this time
16542 	 * of bpf_exit, which means that program wrote
16543 	 * something into it earlier
16544 	 */
16545 	err = check_reg_arg(env, regno, SRC_OP);
16546 	if (err)
16547 		return err;
16548 
16549 	if (is_pointer_value(env, regno)) {
16550 		verbose(env, "R%d leaks addr as return value\n", regno);
16551 		return -EACCES;
16552 	}
16553 
16554 	if (frame->in_async_callback_fn) {
16555 		exit_ctx = "At async callback return";
16556 		range = frame->callback_ret_range;
16557 		goto enforce_retval;
16558 	}
16559 
16560 	if (prog_type == BPF_PROG_TYPE_STRUCT_OPS && !ret_type)
16561 		return 0;
16562 
16563 	if (prog_type == BPF_PROG_TYPE_CGROUP_SKB && (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS))
16564 		enforce_attach_type_range = tnum_range(2, 3);
16565 
16566 	if (!return_retval_range(env, &range))
16567 		return 0;
16568 
16569 enforce_retval:
16570 	if (reg->type != SCALAR_VALUE) {
16571 		verbose(env, "%s the register R%d is not a known value (%s)\n",
16572 			exit_ctx, regno, reg_type_str(env, reg->type));
16573 		return -EINVAL;
16574 	}
16575 
16576 	err = mark_chain_precision(env, regno);
16577 	if (err)
16578 		return err;
16579 
16580 	if (!retval_range_within(range, reg)) {
16581 		verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
16582 		if (prog->expected_attach_type == BPF_LSM_CGROUP &&
16583 		    prog_type == BPF_PROG_TYPE_LSM &&
16584 		    !prog->aux->attach_func_proto->type)
16585 			verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
16586 		return -EINVAL;
16587 	}
16588 
16589 	if (!tnum_is_unknown(enforce_attach_type_range) &&
16590 	    tnum_in(enforce_attach_type_range, reg->var_off))
16591 		env->prog->enforce_expected_attach_type = 1;
16592 	return 0;
16593 }
16594 
16595 static int check_global_subprog_return_code(struct bpf_verifier_env *env)
16596 {
16597 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
16598 	struct bpf_func_state *cur_frame = cur_func(env);
16599 	int err;
16600 
16601 	if (subprog_returns_void(env, cur_frame->subprogno))
16602 		return 0;
16603 
16604 	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
16605 	if (err)
16606 		return err;
16607 
16608 	/* Pointers to arena are safe to pass between subprograms. */
16609 	if (is_arena_reg(env, BPF_REG_0))
16610 		return 0;
16611 
16612 	if (is_pointer_value(env, BPF_REG_0)) {
16613 		verbose(env, "R%d leaks addr as return value\n", BPF_REG_0);
16614 		return -EACCES;
16615 	}
16616 
16617 	if (reg->type != SCALAR_VALUE) {
16618 		verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
16619 			reg_type_str(env, reg->type));
16620 		return -EINVAL;
16621 	}
16622 
16623 	return 0;
16624 }
16625 
16626 /* Bitmask with 1s for all caller saved registers */
16627 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
16628 
16629 /* True if do_misc_fixups() replaces calls to helper number 'imm',
16630  * replacement patch is presumed to follow bpf_fastcall contract
16631  * (see mark_fastcall_pattern_for_call() below).
16632  */
16633 bool bpf_verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
16634 {
16635 	switch (imm) {
16636 #ifdef CONFIG_X86_64
16637 	case BPF_FUNC_get_smp_processor_id:
16638 #ifdef CONFIG_SMP
16639 	case BPF_FUNC_get_current_task_btf:
16640 	case BPF_FUNC_get_current_task:
16641 #endif
16642 		return env->prog->jit_requested && bpf_jit_supports_percpu_insn();
16643 #endif
16644 	default:
16645 		return false;
16646 	}
16647 }
16648 
16649 /* If @call is a kfunc or helper call, fills @cs and returns true,
16650  * otherwise returns false.
16651  */
16652 bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
16653 			  struct bpf_call_summary *cs)
16654 {
16655 	struct bpf_kfunc_call_arg_meta meta;
16656 	const struct bpf_func_proto *fn;
16657 	int i;
16658 
16659 	if (bpf_helper_call(call)) {
16660 
16661 		if (bpf_get_helper_proto(env, call->imm, &fn) < 0)
16662 			/* error would be reported later */
16663 			return false;
16664 		cs->fastcall = fn->allow_fastcall &&
16665 			       (bpf_verifier_inlines_helper_call(env, call->imm) ||
16666 				bpf_jit_inlines_helper_call(call->imm));
16667 		cs->is_void = fn->ret_type == RET_VOID;
16668 		cs->num_params = 0;
16669 		for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) {
16670 			if (fn->arg_type[i] == ARG_DONTCARE)
16671 				break;
16672 			cs->num_params++;
16673 		}
16674 		return true;
16675 	}
16676 
16677 	if (bpf_pseudo_kfunc_call(call)) {
16678 		int err;
16679 
16680 		err = bpf_fetch_kfunc_arg_meta(env, call->imm, call->off, &meta);
16681 		if (err < 0)
16682 			/* error would be reported later */
16683 			return false;
16684 		cs->num_params = btf_type_vlen(meta.func_proto);
16685 		cs->fastcall = meta.kfunc_flags & KF_FASTCALL;
16686 		cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type));
16687 		return true;
16688 	}
16689 
16690 	return false;
16691 }
16692 
16693 /* LLVM define a bpf_fastcall function attribute.
16694  * This attribute means that function scratches only some of
16695  * the caller saved registers defined by ABI.
16696  * For BPF the set of such registers could be defined as follows:
16697  * - R0 is scratched only if function is non-void;
16698  * - R1-R5 are scratched only if corresponding parameter type is defined
16699  *   in the function prototype.
16700  *
16701  * The contract between kernel and clang allows to simultaneously use
16702  * such functions and maintain backwards compatibility with old
16703  * kernels that don't understand bpf_fastcall calls:
16704  *
16705  * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5
16706  *   registers are not scratched by the call;
16707  *
16708  * - as a post-processing step, clang visits each bpf_fastcall call and adds
16709  *   spill/fill for every live r0-r5;
16710  *
16711  * - stack offsets used for the spill/fill are allocated as lowest
16712  *   stack offsets in whole function and are not used for any other
16713  *   purposes;
16714  *
16715  * - when kernel loads a program, it looks for such patterns
16716  *   (bpf_fastcall function surrounded by spills/fills) and checks if
16717  *   spill/fill stack offsets are used exclusively in fastcall patterns;
16718  *
16719  * - if so, and if verifier or current JIT inlines the call to the
16720  *   bpf_fastcall function (e.g. a helper call), kernel removes unnecessary
16721  *   spill/fill pairs;
16722  *
16723  * - when old kernel loads a program, presence of spill/fill pairs
16724  *   keeps BPF program valid, albeit slightly less efficient.
16725  *
16726  * For example:
16727  *
16728  *   r1 = 1;
16729  *   r2 = 2;
16730  *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
16731  *   *(u64 *)(r10 - 16) = r2;            r2 = 2;
16732  *   call %[to_be_inlined]         -->   call %[to_be_inlined]
16733  *   r2 = *(u64 *)(r10 - 16);            r0 = r1;
16734  *   r1 = *(u64 *)(r10 - 8);             r0 += r2;
16735  *   r0 = r1;                            exit;
16736  *   r0 += r2;
16737  *   exit;
16738  *
16739  * The purpose of mark_fastcall_pattern_for_call is to:
16740  * - look for such patterns;
16741  * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern;
16742  * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction;
16743  * - update env->subprog_info[*]->fastcall_stack_off to find an offset
16744  *   at which bpf_fastcall spill/fill stack slots start;
16745  * - update env->subprog_info[*]->keep_fastcall_stack.
16746  *
16747  * The .fastcall_pattern and .fastcall_stack_off are used by
16748  * check_fastcall_stack_contract() to check if every stack access to
16749  * fastcall spill/fill stack slot originates from spill/fill
16750  * instructions, members of fastcall patterns.
16751  *
16752  * If such condition holds true for a subprogram, fastcall patterns could
16753  * be rewritten by remove_fastcall_spills_fills().
16754  * Otherwise bpf_fastcall patterns are not changed in the subprogram
16755  * (code, presumably, generated by an older clang version).
16756  *
16757  * For example, it is *not* safe to remove spill/fill below:
16758  *
16759  *   r1 = 1;
16760  *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
16761  *   call %[to_be_inlined]         -->   call %[to_be_inlined]
16762  *   r1 = *(u64 *)(r10 - 8);             r0 = *(u64 *)(r10 - 8);  <---- wrong !!!
16763  *   r0 = *(u64 *)(r10 - 8);             r0 += r1;
16764  *   r0 += r1;                           exit;
16765  *   exit;
16766  */
16767 static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
16768 					   struct bpf_subprog_info *subprog,
16769 					   int insn_idx, s16 lowest_off)
16770 {
16771 	struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
16772 	struct bpf_insn *call = &env->prog->insnsi[insn_idx];
16773 	u32 clobbered_regs_mask;
16774 	struct bpf_call_summary cs;
16775 	u32 expected_regs_mask;
16776 	s16 off;
16777 	int i;
16778 
16779 	if (!bpf_get_call_summary(env, call, &cs))
16780 		return;
16781 
16782 	/* A bitmask specifying which caller saved registers are clobbered
16783 	 * by a call to a helper/kfunc *as if* this helper/kfunc follows
16784 	 * bpf_fastcall contract:
16785 	 * - includes R0 if function is non-void;
16786 	 * - includes R1-R5 if corresponding parameter has is described
16787 	 *   in the function prototype.
16788 	 */
16789 	clobbered_regs_mask = GENMASK(cs.num_params, cs.is_void ? 1 : 0);
16790 	/* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */
16791 	expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS;
16792 
16793 	/* match pairs of form:
16794 	 *
16795 	 * *(u64 *)(r10 - Y) = rX   (where Y % 8 == 0)
16796 	 * ...
16797 	 * call %[to_be_inlined]
16798 	 * ...
16799 	 * rX = *(u64 *)(r10 - Y)
16800 	 */
16801 	for (i = 1, off = lowest_off; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) {
16802 		if (insn_idx - i < 0 || insn_idx + i >= env->prog->len)
16803 			break;
16804 		stx = &insns[insn_idx - i];
16805 		ldx = &insns[insn_idx + i];
16806 		/* must be a stack spill/fill pair */
16807 		if (stx->code != (BPF_STX | BPF_MEM | BPF_DW) ||
16808 		    ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) ||
16809 		    stx->dst_reg != BPF_REG_10 ||
16810 		    ldx->src_reg != BPF_REG_10)
16811 			break;
16812 		/* must be a spill/fill for the same reg */
16813 		if (stx->src_reg != ldx->dst_reg)
16814 			break;
16815 		/* must be one of the previously unseen registers */
16816 		if ((BIT(stx->src_reg) & expected_regs_mask) == 0)
16817 			break;
16818 		/* must be a spill/fill for the same expected offset,
16819 		 * no need to check offset alignment, BPF_DW stack access
16820 		 * is always 8-byte aligned.
16821 		 */
16822 		if (stx->off != off || ldx->off != off)
16823 			break;
16824 		expected_regs_mask &= ~BIT(stx->src_reg);
16825 		env->insn_aux_data[insn_idx - i].fastcall_pattern = 1;
16826 		env->insn_aux_data[insn_idx + i].fastcall_pattern = 1;
16827 	}
16828 	if (i == 1)
16829 		return;
16830 
16831 	/* Conditionally set 'fastcall_spills_num' to allow forward
16832 	 * compatibility when more helper functions are marked as
16833 	 * bpf_fastcall at compile time than current kernel supports, e.g:
16834 	 *
16835 	 *   1: *(u64 *)(r10 - 8) = r1
16836 	 *   2: call A                  ;; assume A is bpf_fastcall for current kernel
16837 	 *   3: r1 = *(u64 *)(r10 - 8)
16838 	 *   4: *(u64 *)(r10 - 8) = r1
16839 	 *   5: call B                  ;; assume B is not bpf_fastcall for current kernel
16840 	 *   6: r1 = *(u64 *)(r10 - 8)
16841 	 *
16842 	 * There is no need to block bpf_fastcall rewrite for such program.
16843 	 * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy,
16844 	 * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills()
16845 	 * does not remove spill/fill pair {4,6}.
16846 	 */
16847 	if (cs.fastcall)
16848 		env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1;
16849 	else
16850 		subprog->keep_fastcall_stack = 1;
16851 	subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off);
16852 }
16853 
16854 static int mark_fastcall_patterns(struct bpf_verifier_env *env)
16855 {
16856 	struct bpf_subprog_info *subprog = env->subprog_info;
16857 	struct bpf_insn *insn;
16858 	s16 lowest_off;
16859 	int s, i;
16860 
16861 	for (s = 0; s < env->subprog_cnt; ++s, ++subprog) {
16862 		/* find lowest stack spill offset used in this subprog */
16863 		lowest_off = 0;
16864 		for (i = subprog->start; i < (subprog + 1)->start; ++i) {
16865 			insn = env->prog->insnsi + i;
16866 			if (insn->code != (BPF_STX | BPF_MEM | BPF_DW) ||
16867 			    insn->dst_reg != BPF_REG_10)
16868 				continue;
16869 			lowest_off = min(lowest_off, insn->off);
16870 		}
16871 		/* use this offset to find fastcall patterns */
16872 		for (i = subprog->start; i < (subprog + 1)->start; ++i) {
16873 			insn = env->prog->insnsi + i;
16874 			if (insn->code != (BPF_JMP | BPF_CALL))
16875 				continue;
16876 			mark_fastcall_pattern_for_call(env, subprog, i, lowest_off);
16877 		}
16878 	}
16879 	return 0;
16880 }
16881 
16882 static void adjust_btf_func(struct bpf_verifier_env *env)
16883 {
16884 	struct bpf_prog_aux *aux = env->prog->aux;
16885 	int i;
16886 
16887 	if (!aux->func_info)
16888 		return;
16889 
16890 	/* func_info is not available for hidden subprogs */
16891 	for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++)
16892 		aux->func_info[i].insn_off = env->subprog_info[i].start;
16893 }
16894 
16895 /* Find id in idset and increment its count, or add new entry */
16896 static void idset_cnt_inc(struct bpf_idset *idset, u32 id)
16897 {
16898 	u32 i;
16899 
16900 	for (i = 0; i < idset->num_ids; i++) {
16901 		if (idset->entries[i].id == id) {
16902 			idset->entries[i].cnt++;
16903 			return;
16904 		}
16905 	}
16906 	/* New id */
16907 	if (idset->num_ids < BPF_ID_MAP_SIZE) {
16908 		idset->entries[idset->num_ids].id = id;
16909 		idset->entries[idset->num_ids].cnt = 1;
16910 		idset->num_ids++;
16911 	}
16912 }
16913 
16914 /* Find id in idset and return its count, or 0 if not found */
16915 static u32 idset_cnt_get(struct bpf_idset *idset, u32 id)
16916 {
16917 	u32 i;
16918 
16919 	for (i = 0; i < idset->num_ids; i++) {
16920 		if (idset->entries[i].id == id)
16921 			return idset->entries[i].cnt;
16922 	}
16923 	return 0;
16924 }
16925 
16926 /*
16927  * Clear singular scalar ids in a state.
16928  * A register with a non-zero id is called singular if no other register shares
16929  * the same base id. Such registers can be treated as independent (id=0).
16930  */
16931 void bpf_clear_singular_ids(struct bpf_verifier_env *env,
16932 			    struct bpf_verifier_state *st)
16933 {
16934 	struct bpf_idset *idset = &env->idset_scratch;
16935 	struct bpf_func_state *func;
16936 	struct bpf_reg_state *reg;
16937 
16938 	idset->num_ids = 0;
16939 
16940 	bpf_for_each_reg_in_vstate(st, func, reg, ({
16941 		if (reg->type != SCALAR_VALUE)
16942 			continue;
16943 		if (!reg->id)
16944 			continue;
16945 		idset_cnt_inc(idset, reg->id & ~BPF_ADD_CONST);
16946 	}));
16947 
16948 	bpf_for_each_reg_in_vstate(st, func, reg, ({
16949 		if (reg->type != SCALAR_VALUE)
16950 			continue;
16951 		if (!reg->id)
16952 			continue;
16953 		if (idset_cnt_get(idset, reg->id & ~BPF_ADD_CONST) == 1)
16954 			clear_scalar_id(reg);
16955 	}));
16956 }
16957 
16958 /* Return true if it's OK to have the same insn return a different type. */
16959 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
16960 {
16961 	switch (base_type(type)) {
16962 	case PTR_TO_CTX:
16963 	case PTR_TO_SOCKET:
16964 	case PTR_TO_SOCK_COMMON:
16965 	case PTR_TO_TCP_SOCK:
16966 	case PTR_TO_XDP_SOCK:
16967 	case PTR_TO_BTF_ID:
16968 	case PTR_TO_ARENA:
16969 		return false;
16970 	default:
16971 		return true;
16972 	}
16973 }
16974 
16975 /* If an instruction was previously used with particular pointer types, then we
16976  * need to be careful to avoid cases such as the below, where it may be ok
16977  * for one branch accessing the pointer, but not ok for the other branch:
16978  *
16979  * R1 = sock_ptr
16980  * goto X;
16981  * ...
16982  * R1 = some_other_valid_ptr;
16983  * goto X;
16984  * ...
16985  * R2 = *(u32 *)(R1 + 0);
16986  */
16987 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
16988 {
16989 	return src != prev && (!reg_type_mismatch_ok(src) ||
16990 			       !reg_type_mismatch_ok(prev));
16991 }
16992 
16993 static bool is_ptr_to_mem_or_btf_id(enum bpf_reg_type type)
16994 {
16995 	switch (base_type(type)) {
16996 	case PTR_TO_MEM:
16997 	case PTR_TO_BTF_ID:
16998 		return true;
16999 	default:
17000 		return false;
17001 	}
17002 }
17003 
17004 static bool is_ptr_to_mem(enum bpf_reg_type type)
17005 {
17006 	return base_type(type) == PTR_TO_MEM;
17007 }
17008 
17009 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
17010 			     bool allow_trust_mismatch)
17011 {
17012 	enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
17013 	enum bpf_reg_type merged_type;
17014 
17015 	if (*prev_type == NOT_INIT) {
17016 		/* Saw a valid insn
17017 		 * dst_reg = *(u32 *)(src_reg + off)
17018 		 * save type to validate intersecting paths
17019 		 */
17020 		*prev_type = type;
17021 	} else if (reg_type_mismatch(type, *prev_type)) {
17022 		/* Abuser program is trying to use the same insn
17023 		 * dst_reg = *(u32*) (src_reg + off)
17024 		 * with different pointer types:
17025 		 * src_reg == ctx in one branch and
17026 		 * src_reg == stack|map in some other branch.
17027 		 * Reject it.
17028 		 */
17029 		if (allow_trust_mismatch &&
17030 		    is_ptr_to_mem_or_btf_id(type) &&
17031 		    is_ptr_to_mem_or_btf_id(*prev_type)) {
17032 			/*
17033 			 * Have to support a use case when one path through
17034 			 * the program yields TRUSTED pointer while another
17035 			 * is UNTRUSTED. Fallback to UNTRUSTED to generate
17036 			 * BPF_PROBE_MEM/BPF_PROBE_MEMSX.
17037 			 * Same behavior of MEM_RDONLY flag.
17038 			 */
17039 			if (is_ptr_to_mem(type) || is_ptr_to_mem(*prev_type))
17040 				merged_type = PTR_TO_MEM;
17041 			else
17042 				merged_type = PTR_TO_BTF_ID;
17043 			if ((type & PTR_UNTRUSTED) || (*prev_type & PTR_UNTRUSTED))
17044 				merged_type |= PTR_UNTRUSTED;
17045 			if ((type & MEM_RDONLY) || (*prev_type & MEM_RDONLY))
17046 				merged_type |= MEM_RDONLY;
17047 			*prev_type = merged_type;
17048 		} else {
17049 			verbose(env, "same insn cannot be used with different pointers\n");
17050 			return -EINVAL;
17051 		}
17052 	}
17053 
17054 	return 0;
17055 }
17056 
17057 enum {
17058 	PROCESS_BPF_EXIT = 1,
17059 	INSN_IDX_UPDATED = 2,
17060 };
17061 
17062 static int process_bpf_exit_full(struct bpf_verifier_env *env,
17063 				 bool *do_print_state,
17064 				 bool exception_exit)
17065 {
17066 	struct bpf_func_state *cur_frame = cur_func(env);
17067 
17068 	/* We must do check_reference_leak here before
17069 	 * prepare_func_exit to handle the case when
17070 	 * state->curframe > 0, it may be a callback function,
17071 	 * for which reference_state must match caller reference
17072 	 * state when it exits.
17073 	 */
17074 	int err = check_resource_leak(env, exception_exit,
17075 				      exception_exit || !env->cur_state->curframe,
17076 				      exception_exit ? "bpf_throw" :
17077 				      "BPF_EXIT instruction in main prog");
17078 	if (err)
17079 		return err;
17080 
17081 	/* The side effect of the prepare_func_exit which is
17082 	 * being skipped is that it frees bpf_func_state.
17083 	 * Typically, process_bpf_exit will only be hit with
17084 	 * outermost exit. copy_verifier_state in pop_stack will
17085 	 * handle freeing of any extra bpf_func_state left over
17086 	 * from not processing all nested function exits. We
17087 	 * also skip return code checks as they are not needed
17088 	 * for exceptional exits.
17089 	 */
17090 	if (exception_exit)
17091 		return PROCESS_BPF_EXIT;
17092 
17093 	if (env->cur_state->curframe) {
17094 		/* exit from nested function */
17095 		err = prepare_func_exit(env, &env->insn_idx);
17096 		if (err)
17097 			return err;
17098 		*do_print_state = true;
17099 		return INSN_IDX_UPDATED;
17100 	}
17101 
17102 	/*
17103 	 * Return from a regular global subprogram differs from return
17104 	 * from the main program or async/exception callback.
17105 	 * Main program exit implies return code restrictions
17106 	 * that depend on program type.
17107 	 * Exit from exception callback is equivalent to main program exit.
17108 	 * Exit from async callback implies return code restrictions
17109 	 * that depend on async scheduling mechanism.
17110 	 */
17111 	if (cur_frame->subprogno &&
17112 	    !cur_frame->in_async_callback_fn &&
17113 	    !cur_frame->in_exception_callback_fn)
17114 		err = check_global_subprog_return_code(env);
17115 	else
17116 		err = check_return_code(env, BPF_REG_0, "R0");
17117 	if (err)
17118 		return err;
17119 	return PROCESS_BPF_EXIT;
17120 }
17121 
17122 static int indirect_jump_min_max_index(struct bpf_verifier_env *env,
17123 				       int regno,
17124 				       struct bpf_map *map,
17125 				       u32 *pmin_index, u32 *pmax_index)
17126 {
17127 	struct bpf_reg_state *reg = reg_state(env, regno);
17128 	u64 min_index = reg_umin(reg);
17129 	u64 max_index = reg_umax(reg);
17130 	const u32 size = 8;
17131 
17132 	if (min_index > (u64) U32_MAX * size) {
17133 		verbose(env, "the sum of R%u umin_value %llu is too big\n", regno, reg_umin(reg));
17134 		return -ERANGE;
17135 	}
17136 	if (max_index > (u64) U32_MAX * size) {
17137 		verbose(env, "the sum of R%u umax_value %llu is too big\n", regno, reg_umax(reg));
17138 		return -ERANGE;
17139 	}
17140 
17141 	min_index /= size;
17142 	max_index /= size;
17143 
17144 	if (max_index >= map->max_entries) {
17145 		verbose(env, "R%u points to outside of jump table: [%llu,%llu] max_entries %u\n",
17146 			     regno, min_index, max_index, map->max_entries);
17147 		return -EINVAL;
17148 	}
17149 
17150 	*pmin_index = min_index;
17151 	*pmax_index = max_index;
17152 	return 0;
17153 }
17154 
17155 /* gotox *dst_reg */
17156 static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *insn)
17157 {
17158 	struct bpf_verifier_state *other_branch;
17159 	struct bpf_reg_state *dst_reg;
17160 	struct bpf_map *map;
17161 	u32 min_index, max_index;
17162 	int err = 0;
17163 	int n;
17164 	int i;
17165 
17166 	dst_reg = reg_state(env, insn->dst_reg);
17167 	if (dst_reg->type != PTR_TO_INSN) {
17168 		verbose(env, "R%d has type %s, expected PTR_TO_INSN\n",
17169 			     insn->dst_reg, reg_type_str(env, dst_reg->type));
17170 		return -EINVAL;
17171 	}
17172 
17173 	map = dst_reg->map_ptr;
17174 	if (verifier_bug_if(!map, env, "R%d has an empty map pointer", insn->dst_reg))
17175 		return -EFAULT;
17176 
17177 	if (verifier_bug_if(map->map_type != BPF_MAP_TYPE_INSN_ARRAY, env,
17178 			    "R%d has incorrect map type %d", insn->dst_reg, map->map_type))
17179 		return -EFAULT;
17180 
17181 	err = indirect_jump_min_max_index(env, insn->dst_reg, map, &min_index, &max_index);
17182 	if (err)
17183 		return err;
17184 
17185 	/* Ensure that the buffer is large enough */
17186 	if (!env->gotox_tmp_buf || env->gotox_tmp_buf->cnt < max_index - min_index + 1) {
17187 		env->gotox_tmp_buf = bpf_iarray_realloc(env->gotox_tmp_buf,
17188 						        max_index - min_index + 1);
17189 		if (!env->gotox_tmp_buf)
17190 			return -ENOMEM;
17191 	}
17192 
17193 	n = bpf_copy_insn_array_uniq(map, min_index, max_index, env->gotox_tmp_buf->items);
17194 	if (n < 0)
17195 		return n;
17196 	if (n == 0) {
17197 		verbose(env, "register R%d doesn't point to any offset in map id=%d\n",
17198 			     insn->dst_reg, map->id);
17199 		return -EINVAL;
17200 	}
17201 
17202 	for (i = 0; i < n - 1; i++) {
17203 		mark_indirect_target(env, env->gotox_tmp_buf->items[i]);
17204 		other_branch = push_stack(env, env->gotox_tmp_buf->items[i],
17205 					  env->insn_idx, env->cur_state->speculative);
17206 		if (IS_ERR(other_branch))
17207 			return PTR_ERR(other_branch);
17208 	}
17209 	env->insn_idx = env->gotox_tmp_buf->items[n-1];
17210 	mark_indirect_target(env, env->insn_idx);
17211 	return INSN_IDX_UPDATED;
17212 }
17213 
17214 static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state)
17215 {
17216 	int err;
17217 	struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx];
17218 	u8 class = BPF_CLASS(insn->code);
17219 
17220 	switch (class) {
17221 	case BPF_ALU:
17222 	case BPF_ALU64:
17223 		return check_alu_op(env, insn);
17224 
17225 	case BPF_LDX:
17226 		return check_load_mem(env, insn, false,
17227 				      BPF_MODE(insn->code) == BPF_MEMSX,
17228 				      true, "ldx");
17229 
17230 	case BPF_STX:
17231 		if (BPF_MODE(insn->code) == BPF_ATOMIC)
17232 			return check_atomic(env, insn);
17233 		return check_store_reg(env, insn, false);
17234 
17235 	case BPF_ST: {
17236 		/* Handle stack arg write (store immediate) */
17237 		if (is_stack_arg_st(insn)) {
17238 			struct bpf_verifier_state *vstate = env->cur_state;
17239 			struct bpf_func_state *state = vstate->frame[vstate->curframe];
17240 
17241 			return check_stack_arg_write(env, state, insn->off, NULL);
17242 		}
17243 
17244 		enum bpf_reg_type dst_reg_type;
17245 
17246 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17247 		if (err)
17248 			return err;
17249 
17250 		dst_reg_type = cur_regs(env)[insn->dst_reg].type;
17251 
17252 		err = check_mem_access(env, env->insn_idx, cur_regs(env) + insn->dst_reg, argno_from_reg(insn->dst_reg),
17253 				       insn->off, BPF_SIZE(insn->code),
17254 				       BPF_WRITE, -1, false, false);
17255 		if (err)
17256 			return err;
17257 
17258 		return save_aux_ptr_type(env, dst_reg_type, false);
17259 	}
17260 	case BPF_JMP:
17261 	case BPF_JMP32: {
17262 		u8 opcode = BPF_OP(insn->code);
17263 
17264 		env->jmps_processed++;
17265 		if (opcode == BPF_CALL) {
17266 			if (env->cur_state->active_locks) {
17267 				if ((insn->src_reg == BPF_REG_0 &&
17268 				     insn->imm != BPF_FUNC_spin_unlock &&
17269 				     insn->imm != BPF_FUNC_kptr_xchg) ||
17270 				    (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
17271 				     (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) {
17272 					verbose(env,
17273 						"function calls are not allowed while holding a lock\n");
17274 					return -EINVAL;
17275 				}
17276 			}
17277 			mark_reg_scratched(env, BPF_REG_0);
17278 			if (bpf_in_stack_arg_cnt(&env->subprog_info[cur_func(env)->subprogno]))
17279 				cur_func(env)->no_stack_arg_load = true;
17280 			if (insn->src_reg == BPF_PSEUDO_CALL)
17281 				return check_func_call(env, insn, &env->insn_idx);
17282 			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
17283 				return check_kfunc_call(env, insn, &env->insn_idx);
17284 			return check_helper_call(env, insn, &env->insn_idx);
17285 		} else if (opcode == BPF_JA) {
17286 			if (BPF_SRC(insn->code) == BPF_X)
17287 				return check_indirect_jump(env, insn);
17288 
17289 			if (class == BPF_JMP)
17290 				env->insn_idx += insn->off + 1;
17291 			else
17292 				env->insn_idx += insn->imm + 1;
17293 			return INSN_IDX_UPDATED;
17294 		} else if (opcode == BPF_EXIT) {
17295 			return process_bpf_exit_full(env, do_print_state, false);
17296 		}
17297 		return check_cond_jmp_op(env, insn, &env->insn_idx);
17298 	}
17299 	case BPF_LD: {
17300 		u8 mode = BPF_MODE(insn->code);
17301 
17302 		if (mode == BPF_ABS || mode == BPF_IND)
17303 			return check_ld_abs(env, insn);
17304 
17305 		if (mode == BPF_IMM) {
17306 			err = check_ld_imm(env, insn);
17307 			if (err)
17308 				return err;
17309 
17310 			env->insn_idx++;
17311 			sanitize_mark_insn_seen(env);
17312 		}
17313 		return 0;
17314 	}
17315 	}
17316 	/* all class values are handled above. silence compiler warning */
17317 	return -EFAULT;
17318 }
17319 
17320 static int do_check(struct bpf_verifier_env *env)
17321 {
17322 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
17323 	struct bpf_verifier_state *state = env->cur_state;
17324 	struct bpf_insn *insns = env->prog->insnsi;
17325 	int insn_cnt = env->prog->len;
17326 	bool do_print_state = false;
17327 	int prev_insn_idx = -1;
17328 
17329 	for (;;) {
17330 		struct bpf_insn *insn;
17331 		struct bpf_insn_aux_data *insn_aux;
17332 		int err;
17333 
17334 		/* reset current history entry on each new instruction */
17335 		env->cur_hist_ent = NULL;
17336 
17337 		env->prev_insn_idx = prev_insn_idx;
17338 		if (env->insn_idx >= insn_cnt) {
17339 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
17340 				env->insn_idx, insn_cnt);
17341 			return -EFAULT;
17342 		}
17343 
17344 		insn = &insns[env->insn_idx];
17345 		insn_aux = &env->insn_aux_data[env->insn_idx];
17346 
17347 		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
17348 			verbose(env,
17349 				"BPF program is too large. Processed %d insn\n",
17350 				env->insn_processed);
17351 			return -E2BIG;
17352 		}
17353 
17354 		state->last_insn_idx = env->prev_insn_idx;
17355 		state->insn_idx = env->insn_idx;
17356 
17357 		if (bpf_is_prune_point(env, env->insn_idx)) {
17358 			err = bpf_is_state_visited(env, env->insn_idx);
17359 			if (err < 0)
17360 				return err;
17361 			if (err == 1) {
17362 				/* found equivalent state, can prune the search */
17363 				if (env->log.level & BPF_LOG_LEVEL) {
17364 					if (do_print_state)
17365 						verbose(env, "\nfrom %d to %d%s: safe\n",
17366 							env->prev_insn_idx, env->insn_idx,
17367 							env->cur_state->speculative ?
17368 							" (speculative execution)" : "");
17369 					else
17370 						verbose(env, "%d: safe\n", env->insn_idx);
17371 				}
17372 				goto process_bpf_exit;
17373 			}
17374 		}
17375 
17376 		if (bpf_is_jmp_point(env, env->insn_idx)) {
17377 			err = bpf_push_jmp_history(env, state, 0, 0, 0, 0);
17378 			if (err)
17379 				return err;
17380 		}
17381 
17382 		if (signal_pending(current))
17383 			return -EAGAIN;
17384 
17385 		if (need_resched())
17386 			cond_resched();
17387 
17388 		if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
17389 			verbose(env, "\nfrom %d to %d%s:",
17390 				env->prev_insn_idx, env->insn_idx,
17391 				env->cur_state->speculative ?
17392 				" (speculative execution)" : "");
17393 			print_verifier_state(env, state, state->curframe, true);
17394 			do_print_state = false;
17395 		}
17396 
17397 		if (env->log.level & BPF_LOG_LEVEL) {
17398 			if (verifier_state_scratched(env))
17399 				print_insn_state(env, state, state->curframe);
17400 
17401 			verbose_linfo(env, env->insn_idx, "; ");
17402 			env->prev_log_pos = env->log.end_pos;
17403 			verbose(env, "%d: ", env->insn_idx);
17404 			bpf_verbose_insn(env, insn);
17405 			env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
17406 			env->prev_log_pos = env->log.end_pos;
17407 		}
17408 
17409 		if (bpf_prog_is_offloaded(env->prog->aux)) {
17410 			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
17411 							   env->prev_insn_idx);
17412 			if (err)
17413 				return err;
17414 		}
17415 
17416 		sanitize_mark_insn_seen(env);
17417 		prev_insn_idx = env->insn_idx;
17418 
17419 		/* Sanity check: precomputed constants must match verifier state */
17420 		if (!state->speculative && insn_aux->const_reg_mask) {
17421 			struct bpf_reg_state *regs = cur_regs(env);
17422 			u16 mask = insn_aux->const_reg_mask;
17423 
17424 			for (int r = 0; r < ARRAY_SIZE(insn_aux->const_reg_vals); r++) {
17425 				u32 cval = insn_aux->const_reg_vals[r];
17426 
17427 				if (!(mask & BIT(r)))
17428 					continue;
17429 				if (regs[r].type != SCALAR_VALUE)
17430 					continue;
17431 				if (!tnum_is_const(regs[r].var_off))
17432 					continue;
17433 				if (verifier_bug_if((u32)regs[r].var_off.value != cval,
17434 						    env, "const R%d: %u != %llu",
17435 						    r, cval, regs[r].var_off.value))
17436 					return -EFAULT;
17437 			}
17438 		}
17439 
17440 		/* Reduce verification complexity by stopping speculative path
17441 		 * verification when a nospec is encountered.
17442 		 */
17443 		if (state->speculative && insn_aux->nospec)
17444 			goto process_bpf_exit;
17445 
17446 		err = do_check_insn(env, &do_print_state);
17447 		if (error_recoverable_with_nospec(err) && state->speculative) {
17448 			/* Prevent this speculative path from ever reaching the
17449 			 * insn that would have been unsafe to execute.
17450 			 */
17451 			insn_aux->nospec = true;
17452 			/* If it was an ADD/SUB insn, potentially remove any
17453 			 * markings for alu sanitization.
17454 			 */
17455 			insn_aux->alu_state = 0;
17456 			goto process_bpf_exit;
17457 		} else if (err < 0) {
17458 			return err;
17459 		} else if (err == PROCESS_BPF_EXIT) {
17460 			goto process_bpf_exit;
17461 		} else if (err == INSN_IDX_UPDATED) {
17462 		} else if (err == 0) {
17463 			env->insn_idx++;
17464 		}
17465 
17466 		if (state->speculative && insn_aux->nospec_result) {
17467 			/* If we are on a path that performed a jump-op, this
17468 			 * may skip a nospec patched-in after the jump. This can
17469 			 * currently never happen because nospec_result is only
17470 			 * used for the write-ops
17471 			 * `*(size*)(dst_reg+off)=src_reg|imm32` and helper
17472 			 * calls. These must never skip the following insn
17473 			 * (i.e., bpf_insn_successors()'s opcode_info.can_jump
17474 			 * is false). Still, add a warning to document this in
17475 			 * case nospec_result is used elsewhere in the future.
17476 			 *
17477 			 * All non-branch instructions have a single
17478 			 * fall-through edge. For these, nospec_result should
17479 			 * already work.
17480 			 */
17481 			if (verifier_bug_if((BPF_CLASS(insn->code) == BPF_JMP ||
17482 					     BPF_CLASS(insn->code) == BPF_JMP32) &&
17483 					    BPF_OP(insn->code) != BPF_CALL, env,
17484 					    "speculation barrier after jump instruction may not have the desired effect"))
17485 				return -EFAULT;
17486 process_bpf_exit:
17487 			mark_verifier_state_scratched(env);
17488 			err = bpf_update_branch_counts(env, env->cur_state);
17489 			if (err)
17490 				return err;
17491 			err = pop_stack(env, &prev_insn_idx, &env->insn_idx,
17492 					pop_log);
17493 			if (err < 0) {
17494 				if (err != -ENOENT)
17495 					return err;
17496 				break;
17497 			} else {
17498 				do_print_state = true;
17499 				continue;
17500 			}
17501 		}
17502 	}
17503 
17504 	return 0;
17505 }
17506 
17507 static int find_btf_percpu_datasec(struct btf *btf)
17508 {
17509 	const struct btf_type *t;
17510 	const char *tname;
17511 	int i, n;
17512 
17513 	/*
17514 	 * Both vmlinux and module each have their own ".data..percpu"
17515 	 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
17516 	 * types to look at only module's own BTF types.
17517 	 */
17518 	n = btf_nr_types(btf);
17519 	for (i = btf_named_start_id(btf, true); i < n; i++) {
17520 		t = btf_type_by_id(btf, i);
17521 		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
17522 			continue;
17523 
17524 		tname = btf_name_by_offset(btf, t->name_off);
17525 		if (!strcmp(tname, ".data..percpu"))
17526 			return i;
17527 	}
17528 
17529 	return -ENOENT;
17530 }
17531 
17532 /*
17533  * Add btf to the env->used_btfs array. If needed, refcount the
17534  * corresponding kernel module. To simplify caller's logic
17535  * in case of error or if btf was added before the function
17536  * decreases the btf refcount.
17537  */
17538 static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf)
17539 {
17540 	struct btf_mod_pair *btf_mod;
17541 	int ret = 0;
17542 	int i;
17543 
17544 	/* check whether we recorded this BTF (and maybe module) already */
17545 	for (i = 0; i < env->used_btf_cnt; i++)
17546 		if (env->used_btfs[i].btf == btf)
17547 			goto ret_put;
17548 
17549 	if (env->used_btf_cnt >= MAX_USED_BTFS) {
17550 		verbose(env, "The total number of btfs per program has reached the limit of %u\n",
17551 			MAX_USED_BTFS);
17552 		ret = -E2BIG;
17553 		goto ret_put;
17554 	}
17555 
17556 	btf_mod = &env->used_btfs[env->used_btf_cnt];
17557 	btf_mod->btf = btf;
17558 	btf_mod->module = NULL;
17559 
17560 	/* if we reference variables from kernel module, bump its refcount */
17561 	if (btf_is_module(btf)) {
17562 		btf_mod->module = btf_try_get_module(btf);
17563 		if (!btf_mod->module) {
17564 			ret = -ENXIO;
17565 			goto ret_put;
17566 		}
17567 	}
17568 
17569 	env->used_btf_cnt++;
17570 	return 0;
17571 
17572 ret_put:
17573 	/* Either error or this BTF was already added */
17574 	btf_put(btf);
17575 	return ret;
17576 }
17577 
17578 /* replace pseudo btf_id with kernel symbol address */
17579 static int __check_pseudo_btf_id(struct bpf_verifier_env *env,
17580 				 struct bpf_insn *insn,
17581 				 struct bpf_insn_aux_data *aux,
17582 				 struct btf *btf)
17583 {
17584 	const struct btf_var_secinfo *vsi;
17585 	const struct btf_type *datasec;
17586 	const struct btf_type *t;
17587 	const char *sym_name;
17588 	bool percpu = false;
17589 	u32 type, id = insn->imm;
17590 	s32 datasec_id;
17591 	u64 addr;
17592 	int i;
17593 
17594 	t = btf_type_by_id(btf, id);
17595 	if (!t) {
17596 		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
17597 		return -ENOENT;
17598 	}
17599 
17600 	if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
17601 		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
17602 		return -EINVAL;
17603 	}
17604 
17605 	sym_name = btf_name_by_offset(btf, t->name_off);
17606 	addr = kallsyms_lookup_name(sym_name);
17607 	if (!addr) {
17608 		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
17609 			sym_name);
17610 		return -ENOENT;
17611 	}
17612 	insn[0].imm = (u32)addr;
17613 	insn[1].imm = addr >> 32;
17614 
17615 	if (btf_type_is_func(t)) {
17616 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
17617 		aux->btf_var.mem_size = 0;
17618 		return 0;
17619 	}
17620 
17621 	datasec_id = find_btf_percpu_datasec(btf);
17622 	if (datasec_id > 0) {
17623 		datasec = btf_type_by_id(btf, datasec_id);
17624 		for_each_vsi(i, datasec, vsi) {
17625 			if (vsi->type == id) {
17626 				percpu = true;
17627 				break;
17628 			}
17629 		}
17630 	}
17631 
17632 	type = t->type;
17633 	t = btf_type_skip_modifiers(btf, type, NULL);
17634 	if (percpu) {
17635 		aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
17636 		aux->btf_var.btf = btf;
17637 		aux->btf_var.btf_id = type;
17638 	} else if (!btf_type_is_struct(t)) {
17639 		const struct btf_type *ret;
17640 		const char *tname;
17641 		u32 tsize;
17642 
17643 		/* resolve the type size of ksym. */
17644 		ret = btf_resolve_size(btf, t, &tsize);
17645 		if (IS_ERR(ret)) {
17646 			tname = btf_name_by_offset(btf, t->name_off);
17647 			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
17648 				tname, PTR_ERR(ret));
17649 			return -EINVAL;
17650 		}
17651 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
17652 		aux->btf_var.mem_size = tsize;
17653 	} else {
17654 		aux->btf_var.reg_type = PTR_TO_BTF_ID;
17655 		aux->btf_var.btf = btf;
17656 		aux->btf_var.btf_id = type;
17657 	}
17658 
17659 	return 0;
17660 }
17661 
17662 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
17663 			       struct bpf_insn *insn,
17664 			       struct bpf_insn_aux_data *aux)
17665 {
17666 	struct btf *btf;
17667 	int btf_fd;
17668 	int err;
17669 
17670 	btf_fd = insn[1].imm;
17671 	if (btf_fd) {
17672 		btf = btf_get_by_fd(btf_fd);
17673 		if (IS_ERR(btf)) {
17674 			verbose(env, "invalid module BTF object FD specified.\n");
17675 			return -EINVAL;
17676 		}
17677 	} else {
17678 		if (!btf_vmlinux) {
17679 			verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
17680 			return -EINVAL;
17681 		}
17682 		btf_get(btf_vmlinux);
17683 		btf = btf_vmlinux;
17684 	}
17685 
17686 	err = __check_pseudo_btf_id(env, insn, aux, btf);
17687 	if (err) {
17688 		btf_put(btf);
17689 		return err;
17690 	}
17691 
17692 	return __add_used_btf(env, btf);
17693 }
17694 
17695 static bool is_tracing_prog_type(enum bpf_prog_type type)
17696 {
17697 	switch (type) {
17698 	case BPF_PROG_TYPE_KPROBE:
17699 	case BPF_PROG_TYPE_TRACEPOINT:
17700 	case BPF_PROG_TYPE_PERF_EVENT:
17701 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
17702 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
17703 		return true;
17704 	default:
17705 		return false;
17706 	}
17707 }
17708 
17709 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
17710 {
17711 	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
17712 		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
17713 }
17714 
17715 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
17716 					struct bpf_map *map,
17717 					struct bpf_prog *prog)
17718 
17719 {
17720 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
17721 
17722 	if (map->excl_prog_sha &&
17723 	    memcmp(map->excl_prog_sha, prog->digest, SHA256_DIGEST_SIZE)) {
17724 		verbose(env, "program's hash doesn't match map's excl_prog_hash\n");
17725 		return -EACCES;
17726 	}
17727 
17728 	if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
17729 	    btf_record_has_field(map->record, BPF_RB_ROOT)) {
17730 		if (is_tracing_prog_type(prog_type)) {
17731 			verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
17732 			return -EINVAL;
17733 		}
17734 	}
17735 
17736 	if (btf_record_has_field(map->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
17737 		if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
17738 			verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
17739 			return -EINVAL;
17740 		}
17741 
17742 		if (is_tracing_prog_type(prog_type)) {
17743 			verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
17744 			return -EINVAL;
17745 		}
17746 	}
17747 
17748 	if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
17749 	    !bpf_offload_prog_map_match(prog, map)) {
17750 		verbose(env, "offload device mismatch between prog and map\n");
17751 		return -EINVAL;
17752 	}
17753 
17754 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
17755 		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
17756 		return -EINVAL;
17757 	}
17758 
17759 	if (prog->sleepable)
17760 		switch (map->map_type) {
17761 		case BPF_MAP_TYPE_HASH:
17762 		case BPF_MAP_TYPE_RHASH:
17763 		case BPF_MAP_TYPE_LRU_HASH:
17764 		case BPF_MAP_TYPE_ARRAY:
17765 		case BPF_MAP_TYPE_PERCPU_HASH:
17766 		case BPF_MAP_TYPE_PERCPU_ARRAY:
17767 		case BPF_MAP_TYPE_LRU_PERCPU_HASH:
17768 		case BPF_MAP_TYPE_LPM_TRIE:
17769 		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
17770 		case BPF_MAP_TYPE_HASH_OF_MAPS:
17771 		case BPF_MAP_TYPE_RINGBUF:
17772 		case BPF_MAP_TYPE_USER_RINGBUF:
17773 		case BPF_MAP_TYPE_INODE_STORAGE:
17774 		case BPF_MAP_TYPE_SK_STORAGE:
17775 		case BPF_MAP_TYPE_TASK_STORAGE:
17776 		case BPF_MAP_TYPE_CGRP_STORAGE:
17777 		case BPF_MAP_TYPE_QUEUE:
17778 		case BPF_MAP_TYPE_STACK:
17779 		case BPF_MAP_TYPE_ARENA:
17780 		case BPF_MAP_TYPE_INSN_ARRAY:
17781 		case BPF_MAP_TYPE_PROG_ARRAY:
17782 			break;
17783 		default:
17784 			verbose(env,
17785 				"Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
17786 			return -EINVAL;
17787 		}
17788 
17789 	if (bpf_map_is_cgroup_storage(map) &&
17790 	    bpf_cgroup_storage_assign(env->prog->aux, map)) {
17791 		verbose(env, "only one cgroup storage of each type is allowed\n");
17792 		return -EBUSY;
17793 	}
17794 
17795 	if (map->map_type == BPF_MAP_TYPE_ARENA) {
17796 		if (env->prog->aux->arena) {
17797 			verbose(env, "Only one arena per program\n");
17798 			return -EBUSY;
17799 		}
17800 		if (!env->allow_ptr_leaks || !env->bpf_capable) {
17801 			verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
17802 			return -EPERM;
17803 		}
17804 		if (!env->prog->jit_requested) {
17805 			verbose(env, "JIT is required to use arena\n");
17806 			return -EOPNOTSUPP;
17807 		}
17808 		if (!bpf_jit_supports_arena()) {
17809 			verbose(env, "JIT doesn't support arena\n");
17810 			return -EOPNOTSUPP;
17811 		}
17812 		env->prog->aux->arena = (void *)map;
17813 		if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
17814 			verbose(env, "arena's user address must be set via map_extra or mmap()\n");
17815 			return -EINVAL;
17816 		}
17817 	}
17818 
17819 	return 0;
17820 }
17821 
17822 static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)
17823 {
17824 	int i, err;
17825 
17826 	/* check whether we recorded this map already */
17827 	for (i = 0; i < env->used_map_cnt; i++)
17828 		if (env->used_maps[i] == map)
17829 			return i;
17830 
17831 	if (env->used_map_cnt >= MAX_USED_MAPS) {
17832 		verbose(env, "The total number of maps per program has reached the limit of %u\n",
17833 			MAX_USED_MAPS);
17834 		return -E2BIG;
17835 	}
17836 
17837 	err = check_map_prog_compatibility(env, map, env->prog);
17838 	if (err)
17839 		return err;
17840 
17841 	if (env->prog->sleepable)
17842 		atomic64_inc(&map->sleepable_refcnt);
17843 
17844 	/* hold the map. If the program is rejected by verifier,
17845 	 * the map will be released by release_maps() or it
17846 	 * will be used by the valid program until it's unloaded
17847 	 * and all maps are released in bpf_free_used_maps()
17848 	 */
17849 	bpf_map_inc(map);
17850 
17851 	env->used_maps[env->used_map_cnt++] = map;
17852 
17853 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
17854 		err = bpf_insn_array_init(map, env->prog);
17855 		if (err) {
17856 			verbose(env, "Failed to properly initialize insn array\n");
17857 			return err;
17858 		}
17859 		env->insn_array_maps[env->insn_array_map_cnt++] = map;
17860 	}
17861 
17862 	return env->used_map_cnt - 1;
17863 }
17864 
17865 /* Add map behind fd to used maps list, if it's not already there, and return
17866  * its index.
17867  * Returns <0 on error, or >= 0 index, on success.
17868  */
17869 static int add_used_map(struct bpf_verifier_env *env, int fd)
17870 {
17871 	struct bpf_map *map;
17872 	CLASS(fd, f)(fd);
17873 
17874 	map = __bpf_map_get(f);
17875 	if (IS_ERR(map)) {
17876 		verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
17877 		return PTR_ERR(map);
17878 	}
17879 
17880 	return __add_used_map(env, map);
17881 }
17882 
17883 static int check_alu_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
17884 {
17885 	u8 class = BPF_CLASS(insn->code);
17886 	u8 opcode = BPF_OP(insn->code);
17887 
17888 	switch (opcode) {
17889 	case BPF_NEG:
17890 		if (BPF_SRC(insn->code) != BPF_K || insn->src_reg != BPF_REG_0 ||
17891 		    insn->off != 0 || insn->imm != 0) {
17892 			verbose(env, "BPF_NEG uses reserved fields\n");
17893 			return -EINVAL;
17894 		}
17895 		return 0;
17896 	case BPF_END:
17897 		if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
17898 		    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
17899 		    (class == BPF_ALU64 && BPF_SRC(insn->code) != BPF_TO_LE)) {
17900 			verbose(env, "BPF_END uses reserved fields\n");
17901 			return -EINVAL;
17902 		}
17903 		return 0;
17904 	case BPF_MOV:
17905 		if (BPF_SRC(insn->code) == BPF_X) {
17906 			if (class == BPF_ALU) {
17907 				if ((insn->off != 0 && insn->off != 8 && insn->off != 16) ||
17908 				    insn->imm) {
17909 					verbose(env, "BPF_MOV uses reserved fields\n");
17910 					return -EINVAL;
17911 				}
17912 			} else if (insn->off == BPF_ADDR_SPACE_CAST) {
17913 				if (insn->imm != 1 && insn->imm != 1u << 16) {
17914 					verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
17915 					return -EINVAL;
17916 				}
17917 			} else if ((insn->off != 0 && insn->off != 8 &&
17918 				    insn->off != 16 && insn->off != 32) || insn->imm) {
17919 				verbose(env, "BPF_MOV uses reserved fields\n");
17920 				return -EINVAL;
17921 			}
17922 		} else if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
17923 			verbose(env, "BPF_MOV uses reserved fields\n");
17924 			return -EINVAL;
17925 		}
17926 		return 0;
17927 	case BPF_ADD:
17928 	case BPF_SUB:
17929 	case BPF_AND:
17930 	case BPF_OR:
17931 	case BPF_XOR:
17932 	case BPF_LSH:
17933 	case BPF_RSH:
17934 	case BPF_ARSH:
17935 	case BPF_MUL:
17936 	case BPF_DIV:
17937 	case BPF_MOD:
17938 		if (BPF_SRC(insn->code) == BPF_X) {
17939 			if (insn->imm != 0 || (insn->off != 0 && insn->off != 1) ||
17940 			    (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
17941 				verbose(env, "BPF_ALU uses reserved fields\n");
17942 				return -EINVAL;
17943 			}
17944 		} else if (insn->src_reg != BPF_REG_0 ||
17945 			   (insn->off != 0 && insn->off != 1) ||
17946 			   (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
17947 			verbose(env, "BPF_ALU uses reserved fields\n");
17948 			return -EINVAL;
17949 		}
17950 		return 0;
17951 	default:
17952 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
17953 		return -EINVAL;
17954 	}
17955 }
17956 
17957 static int check_jmp_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
17958 {
17959 	u8 class = BPF_CLASS(insn->code);
17960 	u8 opcode = BPF_OP(insn->code);
17961 
17962 	switch (opcode) {
17963 	case BPF_CALL:
17964 		if (BPF_SRC(insn->code) != BPF_K ||
17965 		    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL && insn->off != 0) ||
17966 		    (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL &&
17967 		     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
17968 		    insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
17969 			verbose(env, "BPF_CALL uses reserved fields\n");
17970 			return -EINVAL;
17971 		}
17972 		return 0;
17973 	case BPF_JA:
17974 		if (BPF_SRC(insn->code) == BPF_X) {
17975 			if (insn->src_reg != BPF_REG_0 || insn->imm != 0 || insn->off != 0) {
17976 				verbose(env, "BPF_JA|BPF_X uses reserved fields\n");
17977 				return -EINVAL;
17978 			}
17979 		} else if (insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 ||
17980 			   (class == BPF_JMP && insn->imm != 0) ||
17981 			   (class == BPF_JMP32 && insn->off != 0)) {
17982 			verbose(env, "BPF_JA uses reserved fields\n");
17983 			return -EINVAL;
17984 		}
17985 		return 0;
17986 	case BPF_EXIT:
17987 		if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 ||
17988 		    insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 ||
17989 		    class == BPF_JMP32) {
17990 			verbose(env, "BPF_EXIT uses reserved fields\n");
17991 			return -EINVAL;
17992 		}
17993 		return 0;
17994 	case BPF_JCOND:
17995 		if (insn->code != (BPF_JMP | BPF_JCOND) || insn->src_reg != BPF_MAY_GOTO ||
17996 		    insn->dst_reg || insn->imm) {
17997 			verbose(env, "invalid may_goto imm %d\n", insn->imm);
17998 			return -EINVAL;
17999 		}
18000 		return 0;
18001 	default:
18002 		if (BPF_SRC(insn->code) == BPF_X) {
18003 			if (insn->imm != 0) {
18004 				verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
18005 				return -EINVAL;
18006 			}
18007 		} else if (insn->src_reg != BPF_REG_0) {
18008 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
18009 			return -EINVAL;
18010 		}
18011 		return 0;
18012 	}
18013 }
18014 
18015 static int check_insn_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
18016 {
18017 	switch (BPF_CLASS(insn->code)) {
18018 	case BPF_ALU:
18019 	case BPF_ALU64:
18020 		return check_alu_fields(env, insn);
18021 	case BPF_LDX:
18022 		if ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) ||
18023 		    insn->imm != 0) {
18024 			verbose(env, "BPF_LDX uses reserved fields\n");
18025 			return -EINVAL;
18026 		}
18027 		return 0;
18028 	case BPF_STX:
18029 		if (BPF_MODE(insn->code) == BPF_ATOMIC)
18030 			return 0;
18031 		if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
18032 			verbose(env, "BPF_STX uses reserved fields\n");
18033 			return -EINVAL;
18034 		}
18035 		return 0;
18036 	case BPF_ST:
18037 		if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) {
18038 			verbose(env, "BPF_ST uses reserved fields\n");
18039 			return -EINVAL;
18040 		}
18041 		return 0;
18042 	case BPF_JMP:
18043 	case BPF_JMP32:
18044 		return check_jmp_fields(env, insn);
18045 	case BPF_LD: {
18046 		u8 mode = BPF_MODE(insn->code);
18047 
18048 		if (mode == BPF_ABS || mode == BPF_IND) {
18049 			if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
18050 			    BPF_SIZE(insn->code) == BPF_DW ||
18051 			    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
18052 				verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
18053 				return -EINVAL;
18054 			}
18055 		} else if (mode != BPF_IMM) {
18056 			verbose(env, "invalid BPF_LD mode\n");
18057 			return -EINVAL;
18058 		}
18059 		return 0;
18060 	}
18061 	default:
18062 		verbose(env, "unknown insn class %d\n", BPF_CLASS(insn->code));
18063 		return -EINVAL;
18064 	}
18065 }
18066 
18067 /*
18068  * Check that insns are sane and rewrite pseudo imm in ld_imm64 instructions:
18069  *
18070  * 1. if it accesses map FD, replace it with actual map pointer.
18071  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
18072  *
18073  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
18074  */
18075 static int check_and_resolve_insns(struct bpf_verifier_env *env)
18076 {
18077 	struct bpf_insn *insn = env->prog->insnsi;
18078 	int insn_cnt = env->prog->len;
18079 	int i, err;
18080 
18081 	err = bpf_prog_calc_tag(env->prog);
18082 	if (err)
18083 		return err;
18084 
18085 	for (i = 0; i < insn_cnt; i++, insn++) {
18086 		if (insn->dst_reg >= MAX_BPF_REG &&
18087 		    !is_stack_arg_st(insn) && !is_stack_arg_stx(insn)) {
18088 			verbose(env, "R%d is invalid\n", insn->dst_reg);
18089 			return -EINVAL;
18090 		}
18091 		if (insn->src_reg >= MAX_BPF_REG && !is_stack_arg_ldx(insn)) {
18092 			verbose(env, "R%d is invalid\n", insn->src_reg);
18093 			return -EINVAL;
18094 		}
18095 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
18096 			struct bpf_insn_aux_data *aux;
18097 			struct bpf_map *map;
18098 			int map_idx;
18099 			u64 addr;
18100 			u32 fd;
18101 
18102 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
18103 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
18104 			    insn[1].off != 0) {
18105 				verbose(env, "invalid bpf_ld_imm64 insn\n");
18106 				return -EINVAL;
18107 			}
18108 
18109 			if (insn[0].off != 0) {
18110 				verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
18111 				return -EINVAL;
18112 			}
18113 
18114 			if (insn[0].src_reg == 0)
18115 				/* valid generic load 64-bit imm */
18116 				goto next_insn;
18117 
18118 			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
18119 				aux = &env->insn_aux_data[i];
18120 				err = check_pseudo_btf_id(env, insn, aux);
18121 				if (err)
18122 					return err;
18123 				goto next_insn;
18124 			}
18125 
18126 			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
18127 				aux = &env->insn_aux_data[i];
18128 				aux->ptr_type = PTR_TO_FUNC;
18129 				goto next_insn;
18130 			}
18131 
18132 			/* In final convert_pseudo_ld_imm64() step, this is
18133 			 * converted into regular 64-bit imm load insn.
18134 			 */
18135 			switch (insn[0].src_reg) {
18136 			case BPF_PSEUDO_MAP_VALUE:
18137 			case BPF_PSEUDO_MAP_IDX_VALUE:
18138 				break;
18139 			case BPF_PSEUDO_MAP_FD:
18140 			case BPF_PSEUDO_MAP_IDX:
18141 				if (insn[1].imm == 0)
18142 					break;
18143 				fallthrough;
18144 			default:
18145 				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
18146 				return -EINVAL;
18147 			}
18148 
18149 			switch (insn[0].src_reg) {
18150 			case BPF_PSEUDO_MAP_IDX_VALUE:
18151 			case BPF_PSEUDO_MAP_IDX:
18152 				if (bpfptr_is_null(env->fd_array)) {
18153 					verbose(env, "fd_idx without fd_array is invalid\n");
18154 					return -EPROTO;
18155 				}
18156 				if (copy_from_bpfptr_offset(&fd, env->fd_array,
18157 							    insn[0].imm * sizeof(fd),
18158 							    sizeof(fd)))
18159 					return -EFAULT;
18160 				break;
18161 			default:
18162 				fd = insn[0].imm;
18163 				break;
18164 			}
18165 
18166 			map_idx = add_used_map(env, fd);
18167 			if (map_idx < 0)
18168 				return map_idx;
18169 			map = env->used_maps[map_idx];
18170 
18171 			aux = &env->insn_aux_data[i];
18172 			aux->map_index = map_idx;
18173 
18174 			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
18175 			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
18176 				addr = (unsigned long)map;
18177 			} else {
18178 				u32 off = insn[1].imm;
18179 
18180 				if (!map->ops->map_direct_value_addr) {
18181 					verbose(env, "no direct value access support for this map type\n");
18182 					return -EINVAL;
18183 				}
18184 
18185 				err = map->ops->map_direct_value_addr(map, &addr, off);
18186 				if (err) {
18187 					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
18188 						map->value_size, off);
18189 					return err;
18190 				}
18191 
18192 				aux->map_off = off;
18193 				addr += off;
18194 			}
18195 
18196 			insn[0].imm = (u32)addr;
18197 			insn[1].imm = addr >> 32;
18198 
18199 next_insn:
18200 			insn++;
18201 			i++;
18202 			continue;
18203 		}
18204 
18205 		/* Basic sanity check before we invest more work here. */
18206 		if (!bpf_opcode_in_insntable(insn->code)) {
18207 			verbose(env, "unknown opcode %02x\n", insn->code);
18208 			return -EINVAL;
18209 		}
18210 
18211 		err = check_insn_fields(env, insn);
18212 		if (err)
18213 			return err;
18214 	}
18215 
18216 	/* now all pseudo BPF_LD_IMM64 instructions load valid
18217 	 * 'struct bpf_map *' into a register instead of user map_fd.
18218 	 * These pointers will be used later by verifier to validate map access.
18219 	 */
18220 	return 0;
18221 }
18222 
18223 /* drop refcnt of maps used by the rejected program */
18224 static void release_maps(struct bpf_verifier_env *env)
18225 {
18226 	__bpf_free_used_maps(env->prog->aux, env->used_maps,
18227 			     env->used_map_cnt);
18228 }
18229 
18230 /* drop refcnt of maps used by the rejected program */
18231 static void release_btfs(struct bpf_verifier_env *env)
18232 {
18233 	__bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt);
18234 }
18235 
18236 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
18237 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
18238 {
18239 	struct bpf_insn *insn = env->prog->insnsi;
18240 	int insn_cnt = env->prog->len;
18241 	int i;
18242 
18243 	for (i = 0; i < insn_cnt; i++, insn++) {
18244 		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
18245 			continue;
18246 		if (insn->src_reg == BPF_PSEUDO_FUNC)
18247 			continue;
18248 		insn->src_reg = 0;
18249 	}
18250 }
18251 
18252 static void release_insn_arrays(struct bpf_verifier_env *env)
18253 {
18254 	int i;
18255 
18256 	for (i = 0; i < env->insn_array_map_cnt; i++)
18257 		bpf_insn_array_release(env->insn_array_maps[i]);
18258 }
18259 
18260 
18261 
18262 /* The verifier does more data flow analysis than llvm and will not
18263  * explore branches that are dead at run time. Malicious programs can
18264  * have dead code too. Therefore replace all dead at-run-time code
18265  * with 'ja -1'.
18266  *
18267  * Just nops are not optimal, e.g. if they would sit at the end of the
18268  * program and through another bug we would manage to jump there, then
18269  * we'd execute beyond program memory otherwise. Returning exception
18270  * code also wouldn't work since we can have subprogs where the dead
18271  * code could be located.
18272  */
18273 static void sanitize_dead_code(struct bpf_verifier_env *env)
18274 {
18275 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18276 	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
18277 	struct bpf_insn *insn = env->prog->insnsi;
18278 	const int insn_cnt = env->prog->len;
18279 	int i;
18280 
18281 	for (i = 0; i < insn_cnt; i++) {
18282 		if (aux_data[i].seen)
18283 			continue;
18284 		memcpy(insn + i, &trap, sizeof(trap));
18285 		aux_data[i].zext_dst = false;
18286 	}
18287 }
18288 
18289 
18290 
18291 static void free_states(struct bpf_verifier_env *env)
18292 {
18293 	struct bpf_verifier_state_list *sl;
18294 	struct list_head *head, *pos, *tmp;
18295 	struct bpf_scc_info *info;
18296 	int i, j;
18297 
18298 	bpf_free_verifier_state(env->cur_state, true);
18299 	env->cur_state = NULL;
18300 	while (!pop_stack(env, NULL, NULL, false));
18301 
18302 	list_for_each_safe(pos, tmp, &env->free_list) {
18303 		sl = container_of(pos, struct bpf_verifier_state_list, node);
18304 		bpf_free_verifier_state(&sl->state, false);
18305 		kfree(sl);
18306 	}
18307 	INIT_LIST_HEAD(&env->free_list);
18308 
18309 	for (i = 0; i < env->scc_cnt; ++i) {
18310 		info = env->scc_info[i];
18311 		if (!info)
18312 			continue;
18313 		for (j = 0; j < info->num_visits; j++)
18314 			bpf_free_backedges(&info->visits[j]);
18315 		kvfree(info);
18316 		env->scc_info[i] = NULL;
18317 	}
18318 
18319 	if (!env->explored_states)
18320 		return;
18321 
18322 	for (i = 0; i < state_htab_size(env); i++) {
18323 		head = &env->explored_states[i];
18324 
18325 		list_for_each_safe(pos, tmp, head) {
18326 			sl = container_of(pos, struct bpf_verifier_state_list, node);
18327 			bpf_free_verifier_state(&sl->state, false);
18328 			kfree(sl);
18329 		}
18330 		INIT_LIST_HEAD(&env->explored_states[i]);
18331 	}
18332 }
18333 
18334 static int do_check_common(struct bpf_verifier_env *env, int subprog)
18335 {
18336 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
18337 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
18338 	struct bpf_prog_aux *aux = env->prog->aux;
18339 	struct bpf_verifier_state *state;
18340 	struct bpf_reg_state *regs;
18341 	int ret, i;
18342 
18343 	env->prev_linfo = NULL;
18344 	env->pass_cnt++;
18345 
18346 	state = kzalloc_obj(struct bpf_verifier_state, GFP_KERNEL_ACCOUNT);
18347 	if (!state)
18348 		return -ENOMEM;
18349 	state->curframe = 0;
18350 	state->speculative = false;
18351 	state->branches = 1;
18352 	state->in_sleepable = env->prog->sleepable;
18353 	state->frame[0] = kzalloc_obj(struct bpf_func_state, GFP_KERNEL_ACCOUNT);
18354 	if (!state->frame[0]) {
18355 		kfree(state);
18356 		return -ENOMEM;
18357 	}
18358 	env->cur_state = state;
18359 	init_func_state(env, state->frame[0],
18360 			BPF_MAIN_FUNC /* callsite */,
18361 			0 /* frameno */,
18362 			subprog);
18363 	state->first_insn_idx = env->subprog_info[subprog].start;
18364 	state->last_insn_idx = -1;
18365 
18366 	regs = state->frame[state->curframe]->regs;
18367 	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
18368 		const char *sub_name = subprog_name(env, subprog);
18369 		struct bpf_subprog_arg_info *arg;
18370 		struct bpf_reg_state *reg;
18371 
18372 		if (env->log.level & BPF_LOG_LEVEL)
18373 			verbose(env, "Validating %s() func#%d...\n", sub_name, subprog);
18374 		ret = btf_prepare_func_args(env, subprog);
18375 		if (ret)
18376 			goto out;
18377 
18378 		if (subprog_is_exc_cb(env, subprog)) {
18379 			state->frame[0]->in_exception_callback_fn = true;
18380 
18381 			/*
18382 			 * Global functions are scalar or void, make sure
18383 			 * we return a scalar.
18384 			 */
18385 			if (subprog_returns_void(env, subprog)) {
18386 				verbose(env, "exception cb cannot return void\n");
18387 				ret = -EINVAL;
18388 				goto out;
18389 			}
18390 
18391 			/* Also ensure the callback only has a single scalar argument. */
18392 			if (sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_ANYTHING) {
18393 				verbose(env, "exception cb only supports single integer argument\n");
18394 				ret = -EINVAL;
18395 				goto out;
18396 			}
18397 		}
18398 		for (i = BPF_REG_1; i <= min_t(u32, sub->arg_cnt, MAX_BPF_FUNC_REG_ARGS); i++) {
18399 			arg = &sub->args[i - BPF_REG_1];
18400 			reg = &regs[i];
18401 
18402 			if (arg->arg_type == ARG_PTR_TO_CTX) {
18403 				reg->type = PTR_TO_CTX;
18404 				mark_reg_known_zero(env, regs, i);
18405 			} else if (arg->arg_type == ARG_ANYTHING) {
18406 				reg->type = SCALAR_VALUE;
18407 				mark_reg_unknown(env, regs, i);
18408 			} else if (arg->arg_type == ARG_PTR_TO_DYNPTR) {
18409 				/* assume unspecial LOCAL dynptr type */
18410 				__mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen, 0);
18411 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
18412 				reg->type = PTR_TO_MEM;
18413 				reg->type |= arg->arg_type &
18414 					     (PTR_MAYBE_NULL | PTR_UNTRUSTED | MEM_RDONLY);
18415 				mark_reg_known_zero(env, regs, i);
18416 				reg->mem_size = arg->mem_size;
18417 				if (arg->arg_type & PTR_MAYBE_NULL)
18418 					reg->id = ++env->id_gen;
18419 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
18420 				reg->type = PTR_TO_BTF_ID;
18421 				if (arg->arg_type & PTR_MAYBE_NULL)
18422 					reg->type |= PTR_MAYBE_NULL;
18423 				if (arg->arg_type & PTR_UNTRUSTED)
18424 					reg->type |= PTR_UNTRUSTED;
18425 				if (arg->arg_type & PTR_TRUSTED)
18426 					reg->type |= PTR_TRUSTED;
18427 				mark_reg_known_zero(env, regs, i);
18428 				reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */
18429 				reg->btf_id = arg->btf_id;
18430 				reg->id = ++env->id_gen;
18431 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
18432 				/* caller can pass either PTR_TO_ARENA or SCALAR */
18433 				mark_reg_unknown(env, regs, i);
18434 			} else {
18435 				verifier_bug(env, "unhandled arg#%d type %d",
18436 					     i - BPF_REG_1 + 1, arg->arg_type);
18437 				ret = -EFAULT;
18438 				goto out;
18439 			}
18440 		}
18441 		if (env->prog->type == BPF_PROG_TYPE_EXT && sub->arg_cnt > MAX_BPF_FUNC_REG_ARGS) {
18442 			verbose(env, "freplace programs with >%d args not supported yet\n",
18443 				MAX_BPF_FUNC_REG_ARGS);
18444 			ret = -EINVAL;
18445 			goto out;
18446 		}
18447 	} else {
18448 		/* if main BPF program has associated BTF info, validate that
18449 		 * it's matching expected signature, and otherwise mark BTF
18450 		 * info for main program as unreliable
18451 		 */
18452 		if (env->prog->aux->func_info_aux) {
18453 			ret = btf_prepare_func_args(env, 0);
18454 			if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX) {
18455 				env->prog->aux->func_info_aux[0].unreliable = true;
18456 				sub->arg_cnt = 1;
18457 				sub->stack_arg_cnt = 0;
18458 			}
18459 		}
18460 
18461 		/* 1st arg to a function */
18462 		regs[BPF_REG_1].type = PTR_TO_CTX;
18463 		mark_reg_known_zero(env, regs, BPF_REG_1);
18464 	}
18465 
18466 	/* Acquire references for struct_ops program arguments tagged with "__ref" */
18467 	if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
18468 		for (i = 0; i < aux->ctx_arg_info_size; i++) {
18469 			ret = aux->ctx_arg_info[i].refcounted ? acquire_reference(env, 0, 0) : 0;
18470 			if (ret < 0)
18471 				goto out;
18472 
18473 			aux->ctx_arg_info[i].ref_id = ret;
18474 		}
18475 	}
18476 
18477 	ret = do_check(env);
18478 out:
18479 	if (!ret && pop_log)
18480 		bpf_vlog_reset(&env->log, 0);
18481 	free_states(env);
18482 	return ret;
18483 }
18484 
18485 /* Lazily verify all global functions based on their BTF, if they are called
18486  * from main BPF program or any of subprograms transitively.
18487  * BPF global subprogs called from dead code are not validated.
18488  * All callable global functions must pass verification.
18489  * Otherwise the whole program is rejected.
18490  * Consider:
18491  * int bar(int);
18492  * int foo(int f)
18493  * {
18494  *    return bar(f);
18495  * }
18496  * int bar(int b)
18497  * {
18498  *    ...
18499  * }
18500  * foo() will be verified first for R1=any_scalar_value. During verification it
18501  * will be assumed that bar() already verified successfully and call to bar()
18502  * from foo() will be checked for type match only. Later bar() will be verified
18503  * independently to check that it's safe for R1=any_scalar_value.
18504  */
18505 static int do_check_subprogs(struct bpf_verifier_env *env)
18506 {
18507 	struct bpf_prog_aux *aux = env->prog->aux;
18508 	struct bpf_func_info_aux *sub_aux;
18509 	int i, ret, new_cnt;
18510 	u32 insn_processed;
18511 
18512 	if (!aux->func_info)
18513 		return 0;
18514 
18515 	/* exception callback is presumed to be always called */
18516 	if (env->exception_callback_subprog)
18517 		subprog_aux(env, env->exception_callback_subprog)->called = true;
18518 
18519 again:
18520 	new_cnt = 0;
18521 	for (i = 1; i < env->subprog_cnt; i++) {
18522 		if (!bpf_subprog_is_global(env, i))
18523 			continue;
18524 
18525 		insn_processed = env->insn_processed;
18526 
18527 		sub_aux = subprog_aux(env, i);
18528 		if (!sub_aux->called || sub_aux->verified)
18529 			continue;
18530 
18531 		env->insn_idx = env->subprog_info[i].start;
18532 		WARN_ON_ONCE(env->insn_idx == 0);
18533 		ret = do_check_common(env, i);
18534 		env->subprog_info[i].insn_processed = env->insn_processed - insn_processed;
18535 		if (ret) {
18536 			return ret;
18537 		} else if (env->log.level & BPF_LOG_LEVEL) {
18538 			verbose(env, "Func#%d ('%s') is safe for any args that match its prototype\n",
18539 				i, subprog_name(env, i));
18540 		}
18541 
18542 		/* We verified new global subprog, it might have called some
18543 		 * more global subprogs that we haven't verified yet, so we
18544 		 * need to do another pass over subprogs to verify those.
18545 		 */
18546 		sub_aux->verified = true;
18547 		new_cnt++;
18548 	}
18549 
18550 	/* We can't loop forever as we verify at least one global subprog on
18551 	 * each pass.
18552 	 */
18553 	if (new_cnt)
18554 		goto again;
18555 
18556 	return 0;
18557 }
18558 
18559 static int do_check_main(struct bpf_verifier_env *env)
18560 {
18561 	u32 insn_processed = env->insn_processed;
18562 	int ret;
18563 
18564 	env->insn_idx = 0;
18565 	ret = do_check_common(env, 0);
18566 	env->subprog_info[0].insn_processed = env->insn_processed - insn_processed;
18567 	if (!ret)
18568 		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
18569 	return ret;
18570 }
18571 
18572 
18573 static void print_verification_stats(struct bpf_verifier_env *env)
18574 {
18575 	/* Skip over hidden subprogs which are not verified. */
18576 	int i, subprog_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
18577 
18578 	if (env->log.level & BPF_LOG_STATS) {
18579 		verbose(env, "verification time %lld usec\n",
18580 			div_u64(env->verification_time, 1000));
18581 		verbose(env, "stack depth %d", env->subprog_info[0].stack_depth);
18582 		for (i = 1; i < subprog_cnt; i++)
18583 			verbose(env, "+%d", env->subprog_info[i].stack_depth);
18584 		verbose(env, " max %d\n", env->max_stack_depth);
18585 		verbose(env, "insns processed %d", env->subprog_info[0].insn_processed);
18586 		for (i = 1; i < subprog_cnt; i++)
18587 			if (bpf_subprog_is_global(env, i))
18588 				verbose(env, "+%d", env->subprog_info[i].insn_processed);
18589 		verbose(env, "\n");
18590 	}
18591 	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
18592 		"total_states %d peak_states %d mark_read %d\n",
18593 		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
18594 		env->max_states_per_insn, env->total_states,
18595 		env->peak_states, env->longest_mark_read_walk);
18596 }
18597 
18598 int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
18599 			       const struct bpf_ctx_arg_aux *info, u32 cnt)
18600 {
18601 	prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL_ACCOUNT);
18602 	prog->aux->ctx_arg_info_size = cnt;
18603 
18604 	return prog->aux->ctx_arg_info ? 0 : -ENOMEM;
18605 }
18606 
18607 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
18608 {
18609 	const struct btf_type *t, *func_proto;
18610 	const struct bpf_struct_ops_desc *st_ops_desc;
18611 	const struct bpf_struct_ops *st_ops;
18612 	const struct btf_member *member;
18613 	struct bpf_prog *prog = env->prog;
18614 	bool has_refcounted_arg = false;
18615 	u32 btf_id, member_idx, member_off;
18616 	struct btf *btf;
18617 	const char *mname;
18618 	int i, err;
18619 
18620 	if (!prog->gpl_compatible) {
18621 		verbose(env, "struct ops programs must have a GPL compatible license\n");
18622 		return -EINVAL;
18623 	}
18624 
18625 	if (!prog->aux->attach_btf_id)
18626 		return -ENOTSUPP;
18627 
18628 	btf = prog->aux->attach_btf;
18629 	if (btf_is_module(btf)) {
18630 		/* Make sure st_ops is valid through the lifetime of env */
18631 		env->attach_btf_mod = btf_try_get_module(btf);
18632 		if (!env->attach_btf_mod) {
18633 			verbose(env, "struct_ops module %s is not found\n",
18634 				btf_get_name(btf));
18635 			return -ENOTSUPP;
18636 		}
18637 	}
18638 
18639 	btf_id = prog->aux->attach_btf_id;
18640 	st_ops_desc = bpf_struct_ops_find(btf, btf_id);
18641 	if (!st_ops_desc) {
18642 		verbose(env, "attach_btf_id %u is not a supported struct\n",
18643 			btf_id);
18644 		return -ENOTSUPP;
18645 	}
18646 	st_ops = st_ops_desc->st_ops;
18647 
18648 	t = st_ops_desc->type;
18649 	member_idx = prog->expected_attach_type;
18650 	if (member_idx >= btf_type_vlen(t)) {
18651 		verbose(env, "attach to invalid member idx %u of struct %s\n",
18652 			member_idx, st_ops->name);
18653 		return -EINVAL;
18654 	}
18655 
18656 	member = &btf_type_member(t)[member_idx];
18657 	mname = btf_name_by_offset(btf, member->name_off);
18658 	func_proto = btf_type_resolve_func_ptr(btf, member->type,
18659 					       NULL);
18660 	if (!func_proto) {
18661 		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
18662 			mname, member_idx, st_ops->name);
18663 		return -EINVAL;
18664 	}
18665 
18666 	member_off = __btf_member_bit_offset(t, member) / 8;
18667 	err = bpf_struct_ops_supported(st_ops, member_off);
18668 	if (err) {
18669 		verbose(env, "attach to unsupported member %s of struct %s\n",
18670 			mname, st_ops->name);
18671 		return err;
18672 	}
18673 
18674 	if (st_ops->check_member) {
18675 		err = st_ops->check_member(t, member, prog);
18676 
18677 		if (err) {
18678 			verbose(env, "attach to unsupported member %s of struct %s\n",
18679 				mname, st_ops->name);
18680 			return err;
18681 		}
18682 	}
18683 
18684 	if (prog->aux->priv_stack_requested && !bpf_jit_supports_private_stack()) {
18685 		verbose(env, "Private stack not supported by jit\n");
18686 		return -EACCES;
18687 	}
18688 
18689 	for (i = 0; i < st_ops_desc->arg_info[member_idx].cnt; i++) {
18690 		if (st_ops_desc->arg_info[member_idx].info[i].refcounted) {
18691 			has_refcounted_arg = true;
18692 			break;
18693 		}
18694 	}
18695 
18696 	/* Tail call is not allowed for programs with refcounted arguments since we
18697 	 * cannot guarantee that valid refcounted kptrs will be passed to the callee.
18698 	 */
18699 	for (i = 0; i < env->subprog_cnt; i++) {
18700 		if (has_refcounted_arg && env->subprog_info[i].has_tail_call) {
18701 			verbose(env, "program with __ref argument cannot tail call\n");
18702 			return -EINVAL;
18703 		}
18704 	}
18705 
18706 	prog->aux->st_ops = st_ops;
18707 	prog->aux->attach_st_ops_member_off = member_off;
18708 
18709 	prog->aux->attach_func_proto = func_proto;
18710 	prog->aux->attach_func_name = mname;
18711 	env->ops = st_ops->verifier_ops;
18712 
18713 	return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info,
18714 					  st_ops_desc->arg_info[member_idx].cnt);
18715 }
18716 #define SECURITY_PREFIX "security_"
18717 
18718 #ifdef CONFIG_FUNCTION_ERROR_INJECTION
18719 
18720 /* list of non-sleepable functions that are otherwise on
18721  * ALLOW_ERROR_INJECTION list
18722  */
18723 BTF_SET_START(btf_non_sleepable_error_inject)
18724 /* Three functions below can be called from sleepable and non-sleepable context.
18725  * Assume non-sleepable from bpf safety point of view.
18726  */
18727 BTF_ID(func, __filemap_add_folio)
18728 #ifdef CONFIG_FAIL_PAGE_ALLOC
18729 BTF_ID(func, should_fail_alloc_page)
18730 #endif
18731 #ifdef CONFIG_FAILSLAB
18732 BTF_ID(func, should_failslab)
18733 #endif
18734 BTF_SET_END(btf_non_sleepable_error_inject)
18735 
18736 static int check_non_sleepable_error_inject(u32 btf_id)
18737 {
18738 	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
18739 }
18740 
18741 static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
18742 {
18743 	/* fentry/fexit/fmod_ret progs can be sleepable if they are
18744 	 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
18745 	 */
18746 	if (!check_non_sleepable_error_inject(btf_id) &&
18747 	    within_error_injection_list(addr))
18748 		return 0;
18749 
18750 	return -EINVAL;
18751 }
18752 
18753 static int check_attach_modify_return(unsigned long addr, const char *func_name)
18754 {
18755 	if (within_error_injection_list(addr) ||
18756 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
18757 		return 0;
18758 
18759 	return -EINVAL;
18760 }
18761 
18762 #else
18763 
18764 /* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code
18765  * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name()
18766  * but that just compares two concrete function names.
18767  */
18768 static bool has_arch_syscall_prefix(const char *func_name)
18769 {
18770 #if defined(__x86_64__)
18771 	return !strncmp(func_name, "__x64_", 6);
18772 #elif defined(__i386__)
18773 	return !strncmp(func_name, "__ia32_", 7);
18774 #elif defined(__s390x__)
18775 	return !strncmp(func_name, "__s390x_", 8);
18776 #elif defined(__aarch64__)
18777 	return !strncmp(func_name, "__arm64_", 8);
18778 #elif defined(__riscv)
18779 	return !strncmp(func_name, "__riscv_", 8);
18780 #elif defined(__powerpc__) || defined(__powerpc64__)
18781 	return !strncmp(func_name, "sys_", 4);
18782 #elif defined(__loongarch__)
18783 	return !strncmp(func_name, "sys_", 4);
18784 #else
18785 	return false;
18786 #endif
18787 }
18788 
18789 /* Without error injection, allow sleepable and fmod_ret progs on syscalls. */
18790 
18791 static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
18792 {
18793 	if (has_arch_syscall_prefix(func_name))
18794 		return 0;
18795 
18796 	return -EINVAL;
18797 }
18798 
18799 static int check_attach_modify_return(unsigned long addr, const char *func_name)
18800 {
18801 	if (has_arch_syscall_prefix(func_name) ||
18802 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
18803 		return 0;
18804 
18805 	return -EINVAL;
18806 }
18807 
18808 #endif /* CONFIG_FUNCTION_ERROR_INJECTION */
18809 
18810 static bool is_tracing_multi_id(const struct bpf_prog *prog, u32 btf_id)
18811 {
18812 	return is_tracing_multi(prog->expected_attach_type) && bpf_multi_func_btf_id[0] == btf_id;
18813 }
18814 
18815 static int btf_id_allow_sleepable(u32 btf_id, unsigned long addr, const struct bpf_prog *prog,
18816 				  const struct btf *btf)
18817 {
18818 	const struct btf_type *t;
18819 	const char *tname;
18820 
18821 	switch (prog->type) {
18822 	case BPF_PROG_TYPE_TRACING:
18823 		t = btf_type_by_id(btf, btf_id);
18824 		if (!t)
18825 			return -EINVAL;
18826 		tname = btf_name_by_offset(btf, t->name_off);
18827 		if (!tname)
18828 			return -EINVAL;
18829 
18830 		/*
18831 		 * *.multi sleepable programs will pass initial sleepable check,
18832 		 * the actual attached btf ids are checked later during the link
18833 		 * attachment.
18834 		 */
18835 		if (is_tracing_multi_id(prog, btf_id))
18836 			return 0;
18837 		if (!check_attach_sleepable(btf_id, addr, tname))
18838 			return 0;
18839 		/*
18840 		 * fentry/fexit/fmod_ret progs can also be sleepable if they are
18841 		 * in the fmodret id set with the KF_SLEEPABLE flag.
18842 		 */
18843 		else {
18844 			u32 *flags = btf_kfunc_is_modify_return(btf, btf_id, prog);
18845 
18846 			if (flags && (*flags & KF_SLEEPABLE))
18847 				return 0;
18848 		}
18849 		break;
18850 	case BPF_PROG_TYPE_LSM:
18851 		/*
18852 		 * LSM progs check that they are attached to bpf_lsm_*() funcs.
18853 		 * Only some of them are sleepable.
18854 		 */
18855 		if (bpf_lsm_is_sleepable_hook(btf_id))
18856 			return 0;
18857 		break;
18858 	default:
18859 		break;
18860 	}
18861 	return -EINVAL;
18862 }
18863 
18864 int bpf_check_attach_target(struct bpf_verifier_log *log,
18865 			    const struct bpf_prog *prog,
18866 			    const struct bpf_prog *tgt_prog,
18867 			    u32 btf_id,
18868 			    struct bpf_attach_target_info *tgt_info)
18869 {
18870 	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
18871 	bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
18872 	char trace_symbol[KSYM_SYMBOL_LEN];
18873 	const char prefix[] = "btf_trace_";
18874 	struct bpf_raw_event_map *btp;
18875 	int ret = 0, subprog = -1, i;
18876 	const struct btf_type *t;
18877 	bool conservative = true;
18878 	const char *tname, *fname;
18879 	struct btf *btf;
18880 	long addr = 0;
18881 	struct module *mod = NULL;
18882 
18883 	if (!btf_id) {
18884 		bpf_log(log, "Tracing programs must provide btf_id\n");
18885 		return -EINVAL;
18886 	}
18887 	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
18888 	if (!btf) {
18889 		bpf_log(log,
18890 			"Tracing program can only be attached to another program annotated with BTF\n");
18891 		return -EINVAL;
18892 	}
18893 	t = btf_type_by_id(btf, btf_id);
18894 	if (!t) {
18895 		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
18896 		return -EINVAL;
18897 	}
18898 	tname = btf_name_by_offset(btf, t->name_off);
18899 	if (!tname) {
18900 		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
18901 		return -EINVAL;
18902 	}
18903 	if (tgt_prog) {
18904 		struct bpf_prog_aux *aux = tgt_prog->aux;
18905 		bool tgt_changes_pkt_data;
18906 		bool tgt_might_sleep;
18907 
18908 		if (bpf_prog_is_dev_bound(prog->aux) &&
18909 		    !bpf_prog_dev_bound_match(prog, tgt_prog)) {
18910 			bpf_log(log, "Target program bound device mismatch");
18911 			return -EINVAL;
18912 		}
18913 
18914 		for (i = 0; i < aux->func_info_cnt; i++)
18915 			if (aux->func_info[i].type_id == btf_id) {
18916 				subprog = i;
18917 				break;
18918 			}
18919 		if (subprog == -1) {
18920 			bpf_log(log, "Subprog %s doesn't exist\n", tname);
18921 			return -EINVAL;
18922 		}
18923 		if (aux->func && aux->func[subprog]->aux->exception_cb) {
18924 			bpf_log(log,
18925 				"%s programs cannot attach to exception callback\n",
18926 				prog_extension ? "Extension" : "Tracing");
18927 			return -EINVAL;
18928 		}
18929 		conservative = aux->func_info_aux[subprog].unreliable;
18930 		if (prog_extension) {
18931 			if (conservative) {
18932 				bpf_log(log,
18933 					"Cannot replace static functions\n");
18934 				return -EINVAL;
18935 			}
18936 			if (!prog->jit_requested) {
18937 				bpf_log(log,
18938 					"Extension programs should be JITed\n");
18939 				return -EINVAL;
18940 			}
18941 			tgt_changes_pkt_data = aux->func
18942 					       ? aux->func[subprog]->aux->changes_pkt_data
18943 					       : aux->changes_pkt_data;
18944 			if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) {
18945 				bpf_log(log,
18946 					"Extension program changes packet data, while original does not\n");
18947 				return -EINVAL;
18948 			}
18949 
18950 			tgt_might_sleep = aux->func
18951 					  ? aux->func[subprog]->aux->might_sleep
18952 					  : aux->might_sleep;
18953 			if (prog->aux->might_sleep && !tgt_might_sleep) {
18954 				bpf_log(log,
18955 					"Extension program may sleep, while original does not\n");
18956 				return -EINVAL;
18957 			}
18958 		}
18959 		if (!tgt_prog->jited) {
18960 			bpf_log(log, "Can attach to only JITed progs\n");
18961 			return -EINVAL;
18962 		}
18963 		if (prog_tracing) {
18964 			if (aux->attach_tracing_prog) {
18965 				/*
18966 				 * Target program is an fentry/fexit which is already attached
18967 				 * to another tracing program. More levels of nesting
18968 				 * attachment are not allowed.
18969 				 */
18970 				bpf_log(log, "Cannot nest tracing program attach more than once\n");
18971 				return -EINVAL;
18972 			}
18973 		} else if (tgt_prog->type == prog->type) {
18974 			/*
18975 			 * To avoid potential call chain cycles, prevent attaching of a
18976 			 * program extension to another extension. It's ok to attach
18977 			 * fentry/fexit to extension program.
18978 			 */
18979 			bpf_log(log, "Cannot recursively attach\n");
18980 			return -EINVAL;
18981 		}
18982 		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
18983 		    prog_extension &&
18984 		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
18985 		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT ||
18986 		     tgt_prog->expected_attach_type == BPF_TRACE_FENTRY_MULTI ||
18987 		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT_MULTI ||
18988 		     tgt_prog->expected_attach_type == BPF_TRACE_FSESSION ||
18989 		     tgt_prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) {
18990 			/* Program extensions can extend all program types
18991 			 * except fentry/fexit. The reason is the following.
18992 			 * The fentry/fexit programs are used for performance
18993 			 * analysis, stats and can be attached to any program
18994 			 * type. When extension program is replacing XDP function
18995 			 * it is necessary to allow performance analysis of all
18996 			 * functions. Both original XDP program and its program
18997 			 * extension. Hence attaching fentry/fexit to
18998 			 * BPF_PROG_TYPE_EXT is allowed. If extending of
18999 			 * fentry/fexit was allowed it would be possible to create
19000 			 * long call chain fentry->extension->fentry->extension
19001 			 * beyond reasonable stack size. Hence extending fentry
19002 			 * is not allowed.
19003 			 */
19004 			bpf_log(log, "Cannot extend fentry/fexit/fsession\n");
19005 			return -EINVAL;
19006 		}
19007 	} else {
19008 		if (prog_extension) {
19009 			bpf_log(log, "Cannot replace kernel functions\n");
19010 			return -EINVAL;
19011 		}
19012 	}
19013 
19014 	switch (prog->expected_attach_type) {
19015 	case BPF_TRACE_RAW_TP:
19016 		if (tgt_prog) {
19017 			bpf_log(log,
19018 				"Only FENTRY/FEXIT/FSESSION progs are attachable to another BPF prog\n");
19019 			return -EINVAL;
19020 		}
19021 		if (!btf_type_is_typedef(t)) {
19022 			bpf_log(log, "attach_btf_id %u is not a typedef\n",
19023 				btf_id);
19024 			return -EINVAL;
19025 		}
19026 		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
19027 			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
19028 				btf_id, tname);
19029 			return -EINVAL;
19030 		}
19031 		tname += sizeof(prefix) - 1;
19032 
19033 		/* The func_proto of "btf_trace_##tname" is generated from typedef without argument
19034 		 * names. Thus using bpf_raw_event_map to get argument names.
19035 		 */
19036 		btp = bpf_get_raw_tracepoint(tname);
19037 		if (!btp)
19038 			return -EINVAL;
19039 		if (prog->sleepable && !tracepoint_is_faultable(btp->tp)) {
19040 			bpf_log(log, "Sleepable program cannot attach to non-faultable tracepoint %s\n",
19041 				tname);
19042 			bpf_put_raw_tracepoint(btp);
19043 			return -EINVAL;
19044 		}
19045 		fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
19046 					trace_symbol);
19047 		bpf_put_raw_tracepoint(btp);
19048 
19049 		if (fname)
19050 			ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
19051 
19052 		if (!fname || ret < 0) {
19053 			bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
19054 				prefix, tname);
19055 			t = btf_type_by_id(btf, t->type);
19056 			if (!btf_type_is_ptr(t))
19057 				/* should never happen in valid vmlinux build */
19058 				return -EINVAL;
19059 		} else {
19060 			t = btf_type_by_id(btf, ret);
19061 			if (!btf_type_is_func(t))
19062 				/* should never happen in valid vmlinux build */
19063 				return -EINVAL;
19064 		}
19065 
19066 		t = btf_type_by_id(btf, t->type);
19067 		if (!btf_type_is_func_proto(t))
19068 			/* should never happen in valid vmlinux build */
19069 			return -EINVAL;
19070 
19071 		break;
19072 	case BPF_TRACE_ITER:
19073 		if (!btf_type_is_func(t)) {
19074 			bpf_log(log, "attach_btf_id %u is not a function\n",
19075 				btf_id);
19076 			return -EINVAL;
19077 		}
19078 		t = btf_type_by_id(btf, t->type);
19079 		if (!btf_type_is_func_proto(t))
19080 			return -EINVAL;
19081 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19082 		if (ret)
19083 			return ret;
19084 		break;
19085 	default:
19086 		if (!prog_extension)
19087 			return -EINVAL;
19088 		fallthrough;
19089 	case BPF_MODIFY_RETURN:
19090 	case BPF_LSM_MAC:
19091 	case BPF_LSM_CGROUP:
19092 	case BPF_TRACE_FENTRY:
19093 	case BPF_TRACE_FEXIT:
19094 	case BPF_TRACE_FSESSION:
19095 	case BPF_TRACE_FSESSION_MULTI:
19096 	case BPF_TRACE_FENTRY_MULTI:
19097 	case BPF_TRACE_FEXIT_MULTI:
19098 		if ((prog->expected_attach_type == BPF_TRACE_FSESSION ||
19099 		    prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) &&
19100 		    !bpf_jit_supports_fsession()) {
19101 			bpf_log(log, "JIT does not support fsession\n");
19102 			return -EOPNOTSUPP;
19103 		}
19104 		if (!btf_type_is_func(t)) {
19105 			bpf_log(log, "attach_btf_id %u is not a function\n",
19106 				btf_id);
19107 			return -EINVAL;
19108 		}
19109 		if (prog_extension &&
19110 		    btf_check_type_match(log, prog, btf, t))
19111 			return -EINVAL;
19112 		t = btf_type_by_id(btf, t->type);
19113 		if (!btf_type_is_func_proto(t))
19114 			return -EINVAL;
19115 
19116 		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
19117 		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
19118 		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
19119 			return -EINVAL;
19120 
19121 		if (tgt_prog && conservative)
19122 			t = NULL;
19123 
19124 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19125 		if (ret < 0)
19126 			return ret;
19127 
19128 		/*
19129 		 * *.multi programs don't need an address during program
19130 		 * verification, we just take the module ref if needed.
19131 		 */
19132 		if (is_tracing_multi_id(prog, btf_id)) {
19133 			if (btf_is_module(btf)) {
19134 				mod = btf_try_get_module(btf);
19135 				if (!mod)
19136 					return -ENOENT;
19137 			}
19138 			addr = 0;
19139 		} else if (tgt_prog) {
19140 			if (subprog == 0)
19141 				addr = (long) tgt_prog->bpf_func;
19142 			else
19143 				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
19144 		} else {
19145 			if (btf_is_module(btf)) {
19146 				mod = btf_try_get_module(btf);
19147 				if (mod)
19148 					addr = find_kallsyms_symbol_value(mod, tname);
19149 				else
19150 					addr = 0;
19151 			} else {
19152 				addr = kallsyms_lookup_name(tname);
19153 			}
19154 			if (!addr) {
19155 				module_put(mod);
19156 				bpf_log(log,
19157 					"The address of function %s cannot be found\n",
19158 					tname);
19159 				return -ENOENT;
19160 			}
19161 		}
19162 
19163 		if (prog->sleepable) {
19164 			ret = btf_id_allow_sleepable(btf_id, addr, prog, btf);
19165 			if (ret) {
19166 				module_put(mod);
19167 				bpf_log(log, "%s is not sleepable\n", tname);
19168 				return ret;
19169 			}
19170 		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
19171 			if (tgt_prog) {
19172 				module_put(mod);
19173 				bpf_log(log, "can't modify return codes of BPF programs\n");
19174 				return -EINVAL;
19175 			}
19176 			ret = -EINVAL;
19177 			if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
19178 			    !check_attach_modify_return(addr, tname))
19179 				ret = 0;
19180 			if (ret) {
19181 				module_put(mod);
19182 				bpf_log(log, "%s() is not modifiable\n", tname);
19183 				return ret;
19184 			}
19185 		}
19186 
19187 		break;
19188 	}
19189 	tgt_info->tgt_addr = addr;
19190 	tgt_info->tgt_name = tname;
19191 	tgt_info->tgt_type = t;
19192 	tgt_info->tgt_mod = mod;
19193 	return 0;
19194 }
19195 
19196 BTF_SET_START(btf_id_deny)
19197 BTF_ID_UNUSED
19198 #ifdef CONFIG_SMP
19199 BTF_ID(func, ___migrate_enable)
19200 BTF_ID(func, migrate_disable)
19201 BTF_ID(func, migrate_enable)
19202 #endif
19203 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
19204 BTF_ID(func, rcu_read_unlock_strict)
19205 #endif
19206 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
19207 BTF_ID(func, preempt_count_add)
19208 BTF_ID(func, preempt_count_sub)
19209 #endif
19210 #ifdef CONFIG_PREEMPT_RCU
19211 BTF_ID(func, __rcu_read_lock)
19212 BTF_ID(func, __rcu_read_unlock)
19213 #endif
19214 BTF_SET_END(btf_id_deny)
19215 
19216 /* fexit and fmod_ret can't be used to attach to __noreturn functions.
19217  * Currently, we must manually list all __noreturn functions here. Once a more
19218  * robust solution is implemented, this workaround can be removed.
19219  */
19220 BTF_SET_START(noreturn_deny)
19221 #ifdef CONFIG_IA32_EMULATION
19222 BTF_ID(func, __ia32_sys_exit)
19223 BTF_ID(func, __ia32_sys_exit_group)
19224 #endif
19225 #ifdef CONFIG_KUNIT
19226 BTF_ID(func, __kunit_abort)
19227 BTF_ID(func, kunit_try_catch_throw)
19228 #endif
19229 #ifdef CONFIG_MODULES
19230 BTF_ID(func, __module_put_and_kthread_exit)
19231 #endif
19232 #ifdef CONFIG_X86_64
19233 BTF_ID(func, __x64_sys_exit)
19234 BTF_ID(func, __x64_sys_exit_group)
19235 #endif
19236 BTF_ID(func, do_exit)
19237 BTF_ID(func, do_group_exit)
19238 BTF_ID(func, kthread_complete_and_exit)
19239 BTF_ID(func, make_task_dead)
19240 BTF_SET_END(noreturn_deny)
19241 
19242 static bool can_be_sleepable(struct bpf_prog *prog)
19243 {
19244 	if (prog->type == BPF_PROG_TYPE_TRACING) {
19245 		switch (prog->expected_attach_type) {
19246 		case BPF_TRACE_FENTRY:
19247 		case BPF_TRACE_FEXIT:
19248 		case BPF_MODIFY_RETURN:
19249 		case BPF_TRACE_ITER:
19250 		case BPF_TRACE_FSESSION:
19251 		case BPF_TRACE_RAW_TP:
19252 		case BPF_TRACE_FENTRY_MULTI:
19253 		case BPF_TRACE_FEXIT_MULTI:
19254 		case BPF_TRACE_FSESSION_MULTI:
19255 			return true;
19256 		default:
19257 			return false;
19258 		}
19259 	}
19260 	if (prog->type == BPF_PROG_TYPE_LSM)
19261 		return prog->expected_attach_type != BPF_LSM_CGROUP;
19262 
19263 	return prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
19264 	       prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
19265 	       prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT ||
19266 	       prog->type == BPF_PROG_TYPE_TRACEPOINT;
19267 }
19268 
19269 static int check_attach_btf_id(struct bpf_verifier_env *env)
19270 {
19271 	struct bpf_prog *prog = env->prog;
19272 	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
19273 	struct bpf_attach_target_info tgt_info = {};
19274 	u32 btf_id = prog->aux->attach_btf_id;
19275 	struct bpf_trampoline *tr;
19276 	int ret;
19277 	u64 key;
19278 
19279 	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
19280 		if (prog->sleepable)
19281 			/* attach_btf_id checked to be zero already */
19282 			return 0;
19283 		verbose(env, "Syscall programs can only be sleepable\n");
19284 		return -EINVAL;
19285 	}
19286 
19287 	if (prog->sleepable && !can_be_sleepable(prog)) {
19288 		verbose(env, "Program of this type cannot be sleepable\n");
19289 		return -EINVAL;
19290 	}
19291 
19292 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
19293 		return check_struct_ops_btf_id(env);
19294 
19295 	if (prog->type != BPF_PROG_TYPE_TRACING &&
19296 	    prog->type != BPF_PROG_TYPE_LSM &&
19297 	    prog->type != BPF_PROG_TYPE_EXT)
19298 		return 0;
19299 
19300 	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
19301 	if (ret)
19302 		return ret;
19303 
19304 	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
19305 		/* to make freplace equivalent to their targets, they need to
19306 		 * inherit env->ops and expected_attach_type for the rest of the
19307 		 * verification
19308 		 */
19309 		env->ops = bpf_verifier_ops[tgt_prog->type];
19310 		prog->expected_attach_type = tgt_prog->expected_attach_type;
19311 	}
19312 
19313 	/* store info about the attachment target that will be used later */
19314 	prog->aux->attach_func_proto = tgt_info.tgt_type;
19315 	prog->aux->attach_func_name = tgt_info.tgt_name;
19316 	prog->aux->mod = tgt_info.tgt_mod;
19317 
19318 	if (tgt_prog) {
19319 		prog->aux->saved_dst_prog_type = tgt_prog->type;
19320 		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
19321 	}
19322 
19323 	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
19324 		prog->aux->attach_btf_trace = true;
19325 		return 0;
19326 	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
19327 		return bpf_iter_prog_supported(prog);
19328 	}
19329 
19330 	if (prog->type == BPF_PROG_TYPE_LSM) {
19331 		ret = bpf_lsm_verify_prog(&env->log, prog);
19332 		if (ret < 0)
19333 			return ret;
19334 	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
19335 		   btf_id_set_contains(&btf_id_deny, btf_id)) {
19336 		verbose(env, "Attaching tracing programs to function '%s' is rejected.\n",
19337 			tgt_info.tgt_name);
19338 		return -EINVAL;
19339 	} else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
19340 		   prog->expected_attach_type == BPF_TRACE_FSESSION ||
19341 		   prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI ||
19342 		   prog->expected_attach_type == BPF_MODIFY_RETURN) &&
19343 		   btf_id_set_contains(&noreturn_deny, btf_id)) {
19344 		verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n",
19345 			tgt_info.tgt_name);
19346 		return -EINVAL;
19347 	}
19348 
19349 	/*
19350 	 * We don't get trampoline for tracing_multi programs at this point,
19351 	 * it's done when tracing_multi link is created.
19352 	 */
19353 	if (prog->type == BPF_PROG_TYPE_TRACING &&
19354 	    is_tracing_multi(prog->expected_attach_type))
19355 		return 0;
19356 
19357 	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
19358 	tr = bpf_trampoline_get(key, &tgt_info);
19359 	if (!tr)
19360 		return -ENOMEM;
19361 
19362 	if (tgt_prog && tgt_prog->aux->tail_call_reachable)
19363 		tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
19364 
19365 	prog->aux->dst_trampoline = tr;
19366 	return 0;
19367 }
19368 
19369 int bpf_check_attach_btf_id_multi(struct btf *btf, struct bpf_prog *prog, u32 btf_id,
19370 				  struct bpf_attach_target_info *tgt_info)
19371 {
19372 	const struct btf_type *t;
19373 	unsigned long addr;
19374 	const char *tname;
19375 	int err;
19376 
19377 	if (!btf_id || !btf)
19378 		return -EINVAL;
19379 
19380 	/* Check noreturn attachment. */
19381 	if ((prog->expected_attach_type == BPF_TRACE_FEXIT_MULTI ||
19382 	     prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) &&
19383 	     btf_id_set_contains(&noreturn_deny, btf_id))
19384 		return -EINVAL;
19385 	/* Check denied attachment. */
19386 	if (btf_id_set_contains(&btf_id_deny, btf_id))
19387 		return -EINVAL;
19388 
19389 	/* Check and get function target data. */
19390 	t = btf_type_by_id(btf, btf_id);
19391 	if (!t)
19392 		return -EINVAL;
19393 	tname = btf_name_by_offset(btf, t->name_off);
19394 	if (!tname)
19395 		return -EINVAL;
19396 	if (!btf_type_is_func(t))
19397 		return -EINVAL;
19398 	t = btf_type_by_id(btf, t->type);
19399 	if (!btf_type_is_func_proto(t))
19400 		return -EINVAL;
19401 	err = btf_distill_func_proto(NULL, btf, t, tname, &tgt_info->fmodel);
19402 	if (err < 0)
19403 		return err;
19404 	if (btf_is_module(btf)) {
19405 		/* The bpf program already holds reference to module. */
19406 		if (WARN_ON_ONCE(!prog->aux->mod))
19407 			return -EINVAL;
19408 		addr = find_kallsyms_symbol_value(prog->aux->mod, tname);
19409 	} else {
19410 		addr = kallsyms_lookup_name(tname);
19411 	}
19412 	if (!addr || !ftrace_location(addr))
19413 		return -ENOENT;
19414 
19415 	/* Check sleepable program attachment. */
19416 	if (prog->sleepable) {
19417 		err = btf_id_allow_sleepable(btf_id, addr, prog, btf);
19418 		if (err)
19419 			return err;
19420 	}
19421 	tgt_info->tgt_addr = addr;
19422 	return 0;
19423 }
19424 
19425 struct btf *bpf_get_btf_vmlinux(void)
19426 {
19427 	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
19428 		mutex_lock(&bpf_verifier_lock);
19429 		if (!btf_vmlinux)
19430 			btf_vmlinux = btf_parse_vmlinux();
19431 		mutex_unlock(&bpf_verifier_lock);
19432 	}
19433 	return btf_vmlinux;
19434 }
19435 
19436 /*
19437  * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
19438  * this case expect that every file descriptor in the array is either a map or
19439  * a BTF. Everything else is considered to be trash.
19440  */
19441 static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
19442 {
19443 	struct bpf_map *map;
19444 	struct btf *btf;
19445 	CLASS(fd, f)(fd);
19446 	int err;
19447 
19448 	map = __bpf_map_get(f);
19449 	if (!IS_ERR(map)) {
19450 		err = __add_used_map(env, map);
19451 		if (err < 0)
19452 			return err;
19453 		return 0;
19454 	}
19455 
19456 	btf = __btf_get_by_fd(f);
19457 	if (!IS_ERR(btf)) {
19458 		btf_get(btf);
19459 		return __add_used_btf(env, btf);
19460 	}
19461 
19462 	verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
19463 	return PTR_ERR(map);
19464 }
19465 
19466 static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
19467 {
19468 	size_t size = sizeof(int);
19469 	int ret;
19470 	int fd;
19471 	u32 i;
19472 
19473 	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
19474 
19475 	/*
19476 	 * The only difference between old (no fd_array_cnt is given) and new
19477 	 * APIs is that in the latter case the fd_array is expected to be
19478 	 * continuous and is scanned for map fds right away
19479 	 */
19480 	if (!attr->fd_array_cnt)
19481 		return 0;
19482 
19483 	/* Check for integer overflow */
19484 	if (attr->fd_array_cnt >= (U32_MAX / size)) {
19485 		verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt);
19486 		return -EINVAL;
19487 	}
19488 
19489 	for (i = 0; i < attr->fd_array_cnt; i++) {
19490 		if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
19491 			return -EFAULT;
19492 
19493 		ret = add_fd_from_fd_array(env, fd);
19494 		if (ret)
19495 			return ret;
19496 	}
19497 
19498 	return 0;
19499 }
19500 
19501 /* replace a generic kfunc with a specialized version if necessary */
19502 static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx)
19503 {
19504 	struct bpf_prog *prog = env->prog;
19505 	bool seen_direct_write;
19506 	void *xdp_kfunc;
19507 	bool is_rdonly;
19508 	u32 func_id = desc->func_id;
19509 	u16 offset = desc->offset;
19510 	unsigned long addr = desc->addr;
19511 
19512 	if (offset) /* return if module BTF is used */
19513 		return 0;
19514 
19515 	if (bpf_dev_bound_kfunc_id(func_id)) {
19516 		xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
19517 		if (xdp_kfunc)
19518 			addr = (unsigned long)xdp_kfunc;
19519 		/* fallback to default kfunc when not supported by netdev */
19520 	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
19521 		seen_direct_write = env->seen_direct_write;
19522 		is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
19523 
19524 		if (is_rdonly)
19525 			addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
19526 
19527 		/* restore env->seen_direct_write to its original value, since
19528 		 * may_access_direct_pkt_data mutates it
19529 		 */
19530 		env->seen_direct_write = seen_direct_write;
19531 	} else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) {
19532 		if (bpf_lsm_has_d_inode_locked(prog))
19533 			addr = (unsigned long)bpf_set_dentry_xattr_locked;
19534 	} else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) {
19535 		if (bpf_lsm_has_d_inode_locked(prog))
19536 			addr = (unsigned long)bpf_remove_dentry_xattr_locked;
19537 	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
19538 		if (!env->insn_aux_data[insn_idx].non_sleepable)
19539 			addr = (unsigned long)bpf_dynptr_from_file_sleepable;
19540 	} else if (func_id == special_kfunc_list[KF_bpf_arena_alloc_pages]) {
19541 		if (env->insn_aux_data[insn_idx].non_sleepable)
19542 			addr = (unsigned long)bpf_arena_alloc_pages_non_sleepable;
19543 	} else if (func_id == special_kfunc_list[KF_bpf_arena_free_pages]) {
19544 		if (env->insn_aux_data[insn_idx].non_sleepable)
19545 			addr = (unsigned long)bpf_arena_free_pages_non_sleepable;
19546 	}
19547 	desc->addr = addr;
19548 	return 0;
19549 }
19550 
19551 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
19552 					    u16 struct_meta_reg,
19553 					    u16 node_offset_reg,
19554 					    struct bpf_insn *insn,
19555 					    struct bpf_insn *insn_buf,
19556 					    int *cnt)
19557 {
19558 	struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
19559 	struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
19560 
19561 	insn_buf[0] = addr[0];
19562 	insn_buf[1] = addr[1];
19563 	insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
19564 	insn_buf[3] = *insn;
19565 	*cnt = 4;
19566 }
19567 
19568 int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
19569 		     struct bpf_insn *insn_buf, int insn_idx, int *cnt)
19570 {
19571 	struct bpf_kfunc_desc *desc;
19572 	int err;
19573 
19574 	if (!insn->imm) {
19575 		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
19576 		return -EINVAL;
19577 	}
19578 
19579 	*cnt = 0;
19580 
19581 	/* insn->imm has the btf func_id. Replace it with an offset relative to
19582 	 * __bpf_call_base, unless the JIT needs to call functions that are
19583 	 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
19584 	 */
19585 	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
19586 	if (!desc) {
19587 		verifier_bug(env, "kernel function descriptor not found for func_id %u",
19588 			     insn->imm);
19589 		return -EFAULT;
19590 	}
19591 
19592 	err = specialize_kfunc(env, desc, insn_idx);
19593 	if (err)
19594 		return err;
19595 
19596 	if (!bpf_jit_supports_far_kfunc_call())
19597 		insn->imm = BPF_CALL_IMM(desc->addr);
19598 
19599 	if (is_bpf_obj_new_kfunc(desc->func_id) || is_bpf_percpu_obj_new_kfunc(desc->func_id)) {
19600 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19601 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19602 		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
19603 
19604 		if (is_bpf_percpu_obj_new_kfunc(desc->func_id) && kptr_struct_meta) {
19605 			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
19606 				     insn_idx);
19607 			return -EFAULT;
19608 		}
19609 
19610 		insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
19611 		insn_buf[1] = addr[0];
19612 		insn_buf[2] = addr[1];
19613 		insn_buf[3] = *insn;
19614 		*cnt = 4;
19615 	} else if (is_bpf_obj_drop_kfunc(desc->func_id) ||
19616 		   is_bpf_percpu_obj_drop_kfunc(desc->func_id) ||
19617 		   is_bpf_refcount_acquire_kfunc(desc->func_id)) {
19618 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19619 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19620 
19621 		if (is_bpf_percpu_obj_drop_kfunc(desc->func_id) && kptr_struct_meta) {
19622 			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
19623 				     insn_idx);
19624 			return -EFAULT;
19625 		}
19626 
19627 		if (is_bpf_refcount_acquire_kfunc(desc->func_id) && !kptr_struct_meta) {
19628 			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
19629 				     insn_idx);
19630 			return -EFAULT;
19631 		}
19632 
19633 		insn_buf[0] = addr[0];
19634 		insn_buf[1] = addr[1];
19635 		insn_buf[2] = *insn;
19636 		*cnt = 3;
19637 	} else if (is_bpf_list_push_kfunc(desc->func_id) ||
19638 		   is_bpf_rbtree_add_kfunc(desc->func_id)) {
19639 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19640 		int struct_meta_reg = BPF_REG_3;
19641 		int node_offset_reg = BPF_REG_4;
19642 
19643 		/* list_add/rbtree_add have an extra arg (prev/less),
19644 		 * so args-to-fixup are in diff regs.
19645 		 */
19646 		if (desc->func_id == special_kfunc_list[KF_bpf_list_add] ||
19647 		    is_bpf_rbtree_add_kfunc(desc->func_id)) {
19648 			struct_meta_reg = BPF_REG_4;
19649 			node_offset_reg = BPF_REG_5;
19650 		}
19651 
19652 		if (!kptr_struct_meta) {
19653 			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
19654 				     insn_idx);
19655 			return -EFAULT;
19656 		}
19657 
19658 		__fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
19659 						node_offset_reg, insn, insn_buf, cnt);
19660 	} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
19661 		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
19662 		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
19663 		*cnt = 1;
19664 	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] &&
19665 		   (env->prog->expected_attach_type == BPF_TRACE_FSESSION ||
19666 		    env->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) {
19667 
19668 		/*
19669 		 * inline the bpf_session_is_return() for fsession:
19670 		 *   bool bpf_session_is_return(void *ctx)
19671 		 *   {
19672 		 *       return (((u64 *)ctx)[-1] >> BPF_TRAMP_IS_RETURN_SHIFT) & 1;
19673 		 *   }
19674 		 */
19675 		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19676 		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_IS_RETURN_SHIFT);
19677 		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
19678 		*cnt = 3;
19679 	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
19680 		   (env->prog->expected_attach_type == BPF_TRACE_FSESSION ||
19681 		    env->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) {
19682 		/*
19683 		 * inline bpf_session_cookie() for fsession:
19684 		 *   __u64 *bpf_session_cookie(void *ctx)
19685 		 *   {
19686 		 *       u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_COOKIE_INDEX_SHIFT) & 0xFF;
19687 		 *       return &((u64 *)ctx)[-off];
19688 		 *   }
19689 		 */
19690 		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19691 		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_COOKIE_INDEX_SHIFT);
19692 		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
19693 		insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
19694 		insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
19695 		insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
19696 		*cnt = 6;
19697 	}
19698 
19699 	if (env->insn_aux_data[insn_idx].arg_prog) {
19700 		u32 regno = env->insn_aux_data[insn_idx].arg_prog;
19701 		struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) };
19702 		int idx = *cnt;
19703 
19704 		insn_buf[idx++] = ld_addrs[0];
19705 		insn_buf[idx++] = ld_addrs[1];
19706 		insn_buf[idx++] = *insn;
19707 		*cnt = idx;
19708 	}
19709 	return 0;
19710 }
19711 
19712 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr,
19713 	      struct bpf_log_attr *attr_log)
19714 {
19715 	u64 start_time = ktime_get_ns();
19716 	struct bpf_verifier_env *env;
19717 	int i, len, ret = -EINVAL, err;
19718 	bool is_priv;
19719 
19720 	BTF_TYPE_EMIT(enum bpf_features);
19721 
19722 	/* no program is valid */
19723 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
19724 		return -EINVAL;
19725 
19726 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
19727 	 * allocate/free it every time bpf_check() is called
19728 	 */
19729 	env = kvzalloc_obj(struct bpf_verifier_env, GFP_KERNEL_ACCOUNT);
19730 	if (!env)
19731 		return -ENOMEM;
19732 
19733 	env->bt.env = env;
19734 
19735 	len = (*prog)->len;
19736 	env->insn_aux_data =
19737 		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
19738 	ret = -ENOMEM;
19739 	if (!env->insn_aux_data)
19740 		goto err_free_env;
19741 	for (i = 0; i < len; i++)
19742 		env->insn_aux_data[i].orig_idx = i;
19743 	env->succ = bpf_iarray_realloc(NULL, 2);
19744 	if (!env->succ)
19745 		goto err_free_env;
19746 	env->prog = *prog;
19747 	env->ops = bpf_verifier_ops[env->prog->type];
19748 
19749 	env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
19750 	env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
19751 	env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token);
19752 	env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token);
19753 	env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF);
19754 
19755 	bpf_get_btf_vmlinux();
19756 
19757 	/* grab the mutex to protect few globals used by verifier */
19758 	if (!is_priv)
19759 		mutex_lock(&bpf_verifier_lock);
19760 
19761 	/* user could have requested verbose verifier output
19762 	 * and supplied buffer to store the verification trace
19763 	 */
19764 	ret = bpf_vlog_init(&env->log, attr_log->level, attr_log->ubuf, attr_log->size);
19765 	if (ret)
19766 		goto err_unlock;
19767 
19768 	ret = process_fd_array(env, attr, uattr);
19769 	if (ret)
19770 		goto skip_full_check;
19771 
19772 	mark_verifier_state_clean(env);
19773 
19774 	if (IS_ERR(btf_vmlinux)) {
19775 		/* Either gcc or pahole or kernel are broken. */
19776 		verbose(env, "in-kernel BTF is malformed\n");
19777 		ret = PTR_ERR(btf_vmlinux);
19778 		goto skip_full_check;
19779 	}
19780 
19781 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
19782 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
19783 		env->strict_alignment = true;
19784 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
19785 		env->strict_alignment = false;
19786 
19787 	if (is_priv)
19788 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
19789 	env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
19790 
19791 	env->explored_states = kvzalloc_objs(struct list_head,
19792 					     state_htab_size(env),
19793 					     GFP_KERNEL_ACCOUNT);
19794 	ret = -ENOMEM;
19795 	if (!env->explored_states)
19796 		goto skip_full_check;
19797 
19798 	for (i = 0; i < state_htab_size(env); i++)
19799 		INIT_LIST_HEAD(&env->explored_states[i]);
19800 	INIT_LIST_HEAD(&env->free_list);
19801 
19802 	ret = bpf_check_btf_info_early(env, attr, uattr);
19803 	if (ret < 0)
19804 		goto skip_full_check;
19805 
19806 	ret = add_subprog_and_kfunc(env);
19807 	if (ret < 0)
19808 		goto skip_full_check;
19809 
19810 	ret = check_subprogs(env);
19811 	if (ret < 0)
19812 		goto skip_full_check;
19813 
19814 	ret = bpf_check_btf_info(env, attr, uattr);
19815 	if (ret < 0)
19816 		goto skip_full_check;
19817 
19818 	ret = check_and_resolve_insns(env);
19819 	if (ret < 0)
19820 		goto skip_full_check;
19821 
19822 	if (bpf_prog_is_offloaded(env->prog->aux)) {
19823 		ret = bpf_prog_offload_verifier_prep(env->prog);
19824 		if (ret)
19825 			goto skip_full_check;
19826 	}
19827 
19828 	ret = bpf_check_cfg(env);
19829 	if (ret < 0)
19830 		goto skip_full_check;
19831 
19832 	ret = bpf_compute_postorder(env);
19833 	if (ret < 0)
19834 		goto skip_full_check;
19835 
19836 	ret = bpf_stack_liveness_init(env);
19837 	if (ret)
19838 		goto skip_full_check;
19839 
19840 	ret = check_attach_btf_id(env);
19841 	if (ret)
19842 		goto skip_full_check;
19843 
19844 	ret = bpf_compute_const_regs(env);
19845 	if (ret < 0)
19846 		goto skip_full_check;
19847 
19848 	ret = bpf_prune_dead_branches(env);
19849 	if (ret < 0)
19850 		goto skip_full_check;
19851 
19852 	ret = sort_subprogs_topo(env);
19853 	if (ret < 0)
19854 		goto skip_full_check;
19855 
19856 	ret = bpf_compute_scc(env);
19857 	if (ret < 0)
19858 		goto skip_full_check;
19859 
19860 	ret = bpf_compute_live_registers(env);
19861 	if (ret < 0)
19862 		goto skip_full_check;
19863 
19864 	ret = mark_fastcall_patterns(env);
19865 	if (ret < 0)
19866 		goto skip_full_check;
19867 
19868 	ret = do_check_main(env);
19869 	ret = ret ?: do_check_subprogs(env);
19870 
19871 	if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
19872 		ret = bpf_prog_offload_finalize(env);
19873 
19874 skip_full_check:
19875 	kvfree(env->explored_states);
19876 
19877 	/* might decrease stack depth, keep it before passes that
19878 	 * allocate additional slots.
19879 	 */
19880 	if (ret == 0)
19881 		ret = bpf_remove_fastcall_spills_fills(env);
19882 
19883 	if (ret == 0)
19884 		ret = check_max_stack_depth(env);
19885 
19886 	/* instruction rewrites happen after this point */
19887 	if (ret == 0)
19888 		ret = bpf_optimize_bpf_loop(env);
19889 
19890 	if (is_priv) {
19891 		if (ret == 0)
19892 			bpf_opt_hard_wire_dead_code_branches(env);
19893 		if (ret == 0)
19894 			ret = bpf_opt_remove_dead_code(env);
19895 		if (ret == 0)
19896 			ret = bpf_opt_remove_nops(env);
19897 	} else {
19898 		if (ret == 0)
19899 			sanitize_dead_code(env);
19900 	}
19901 
19902 	if (ret == 0)
19903 		/* program is valid, convert *(u32*)(ctx + off) accesses */
19904 		ret = bpf_convert_ctx_accesses(env);
19905 
19906 	if (ret == 0)
19907 		ret = bpf_do_misc_fixups(env);
19908 
19909 	/* do 32-bit optimization after insn patching has done so those patched
19910 	 * insns could be handled correctly.
19911 	 */
19912 	if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
19913 		ret = bpf_opt_subreg_zext_lo32_rnd_hi32(env, attr);
19914 		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
19915 								     : false;
19916 	}
19917 
19918 	if (ret == 0)
19919 		ret = bpf_fixup_call_args(env);
19920 
19921 	env->verification_time = ktime_get_ns() - start_time;
19922 	print_verification_stats(env);
19923 	env->prog->aux->verified_insns = env->insn_processed;
19924 
19925 	/* preserve original error even if log finalization is successful */
19926 	err = bpf_log_attr_finalize(attr_log, &env->log);
19927 	if (err)
19928 		ret = err;
19929 
19930 	if (ret)
19931 		goto err_release_maps;
19932 
19933 	if (env->used_map_cnt) {
19934 		/* if program passed verifier, update used_maps in bpf_prog_info */
19935 		env->prog->aux->used_maps = kmalloc_objs(env->used_maps[0],
19936 							 env->used_map_cnt,
19937 							 GFP_KERNEL_ACCOUNT);
19938 
19939 		if (!env->prog->aux->used_maps) {
19940 			ret = -ENOMEM;
19941 			goto err_release_maps;
19942 		}
19943 
19944 		memcpy(env->prog->aux->used_maps, env->used_maps,
19945 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
19946 		env->prog->aux->used_map_cnt = env->used_map_cnt;
19947 	}
19948 	if (env->used_btf_cnt) {
19949 		/* if program passed verifier, update used_btfs in bpf_prog_aux */
19950 		env->prog->aux->used_btfs = kmalloc_objs(env->used_btfs[0],
19951 							 env->used_btf_cnt,
19952 							 GFP_KERNEL_ACCOUNT);
19953 		if (!env->prog->aux->used_btfs) {
19954 			ret = -ENOMEM;
19955 			goto err_release_maps;
19956 		}
19957 
19958 		memcpy(env->prog->aux->used_btfs, env->used_btfs,
19959 		       sizeof(env->used_btfs[0]) * env->used_btf_cnt);
19960 		env->prog->aux->used_btf_cnt = env->used_btf_cnt;
19961 	}
19962 	if (env->used_map_cnt || env->used_btf_cnt) {
19963 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
19964 		 * bpf_ld_imm64 instructions
19965 		 */
19966 		convert_pseudo_ld_imm64(env);
19967 	}
19968 
19969 	adjust_btf_func(env);
19970 
19971 	/* extension progs temporarily inherit the attach_type of their targets
19972 	   for verification purposes, so set it back to zero before returning
19973 	 */
19974 	if (env->prog->type == BPF_PROG_TYPE_EXT)
19975 		env->prog->expected_attach_type = 0;
19976 
19977 	env->prog = __bpf_prog_select_runtime(env, env->prog, &ret);
19978 
19979 err_release_maps:
19980 	if (ret)
19981 		release_insn_arrays(env);
19982 	if (!env->prog->aux->used_maps)
19983 		/* if we didn't copy map pointers into bpf_prog_info, release
19984 		 * them now. Otherwise free_used_maps() will release them.
19985 		 */
19986 		release_maps(env);
19987 	if (!env->prog->aux->used_btfs)
19988 		release_btfs(env);
19989 
19990 	*prog = env->prog;
19991 
19992 	module_put(env->attach_btf_mod);
19993 err_unlock:
19994 	if (!is_priv)
19995 		mutex_unlock(&bpf_verifier_lock);
19996 	bpf_clear_insn_aux_data(env, 0, env->prog->len);
19997 	vfree(env->insn_aux_data);
19998 err_free_env:
19999 	bpf_stack_liveness_free(env);
20000 	kvfree(env->cfg.insn_postorder);
20001 	kvfree(env->scc_info);
20002 	kvfree(env->succ);
20003 	kvfree(env->gotox_tmp_buf);
20004 	kvfree(env);
20005 	return ret;
20006 }
20007