xref: /linux/kernel/bpf/verifier.c (revision 2148794eeaf2a898adc791e9472eb80ea55984da)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5  */
6 #include <uapi/linux/btf.h>
7 #include <linux/bpf-cgroup.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/bpf.h>
12 #include <linux/btf.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/filter.h>
15 #include <net/netlink.h>
16 #include <linux/file.h>
17 #include <linux/vmalloc.h>
18 #include <linux/stringify.h>
19 #include <linux/bsearch.h>
20 #include <linux/sort.h>
21 #include <linux/perf_event.h>
22 #include <linux/ctype.h>
23 #include <linux/error-injection.h>
24 #include <linux/bpf_lsm.h>
25 #include <linux/btf_ids.h>
26 #include <linux/poison.h>
27 #include <linux/module.h>
28 #include <linux/cpumask.h>
29 #include <linux/cnum.h>
30 #include <linux/bpf_mem_alloc.h>
31 #include <net/xdp.h>
32 #include <linux/trace_events.h>
33 #include <linux/kallsyms.h>
34 
35 #include "disasm.h"
36 
37 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
38 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
39 	[_id] = & _name ## _verifier_ops,
40 #define BPF_MAP_TYPE(_id, _ops)
41 #define BPF_LINK_TYPE(_id, _name)
42 #include <linux/bpf_types.h>
43 #undef BPF_PROG_TYPE
44 #undef BPF_MAP_TYPE
45 #undef BPF_LINK_TYPE
46 };
47 
48 enum bpf_features {
49 	BPF_FEAT_RDONLY_CAST_TO_VOID = 0,
50 	BPF_FEAT_STREAMS	     = 1,
51 	__MAX_BPF_FEAT,
52 };
53 
54 struct bpf_mem_alloc bpf_global_percpu_ma;
55 static bool bpf_global_percpu_ma_set;
56 
57 /* bpf_check() is a static code analyzer that walks eBPF program
58  * instruction by instruction and updates register/stack state.
59  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
60  *
61  * The first pass is depth-first-search to check that the program is a DAG.
62  * It rejects the following programs:
63  * - larger than BPF_MAXINSNS insns
64  * - if loop is present (detected via back-edge)
65  * - unreachable insns exist (shouldn't be a forest. program = one function)
66  * - out of bounds or malformed jumps
67  * The second pass is all possible path descent from the 1st insn.
68  * Since it's analyzing all paths through the program, the length of the
69  * analysis is limited to 64k insn, which may be hit even if total number of
70  * insn is less then 4K, but there are too many branches that change stack/regs.
71  * Number of 'branches to be analyzed' is limited to 1k
72  *
73  * On entry to each instruction, each register has a type, and the instruction
74  * changes the types of the registers depending on instruction semantics.
75  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
76  * copied to R1.
77  *
78  * All registers are 64-bit.
79  * R0 - return register
80  * R1-R5 argument passing registers
81  * R6-R9 callee saved registers
82  * R10 - frame pointer read-only
83  *
84  * At the start of BPF program the register R1 contains a pointer to bpf_context
85  * and has type PTR_TO_CTX.
86  *
87  * Verifier tracks arithmetic operations on pointers in case:
88  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
89  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
90  * 1st insn copies R10 (which has FRAME_PTR) type into R1
91  * and 2nd arithmetic instruction is pattern matched to recognize
92  * that it wants to construct a pointer to some element within stack.
93  * So after 2nd insn, the register R1 has type PTR_TO_STACK
94  * (and -20 constant is saved for further stack bounds checking).
95  * Meaning that this reg is a pointer to stack plus known immediate constant.
96  *
97  * Most of the time the registers have SCALAR_VALUE type, which
98  * means the register has some value, but it's not a valid pointer.
99  * (like pointer plus pointer becomes SCALAR_VALUE type)
100  *
101  * When verifier sees load or store instructions the type of base register
102  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
103  * four pointer types recognized by check_mem_access() function.
104  *
105  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
106  * and the range of [ptr, ptr + map's value_size) is accessible.
107  *
108  * registers used to pass values to function calls are checked against
109  * function argument constraints.
110  *
111  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
112  * It means that the register type passed to this function must be
113  * PTR_TO_STACK and it will be used inside the function as
114  * 'pointer to map element key'
115  *
116  * For example the argument constraints for bpf_map_lookup_elem():
117  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
118  *   .arg1_type = ARG_CONST_MAP_PTR,
119  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
120  *
121  * ret_type says that this function returns 'pointer to map elem value or null'
122  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
123  * 2nd argument should be a pointer to stack, which will be used inside
124  * the helper function as a pointer to map element key.
125  *
126  * On the kernel side the helper function looks like:
127  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
128  * {
129  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
130  *    void *key = (void *) (unsigned long) r2;
131  *    void *value;
132  *
133  *    here kernel can access 'key' and 'map' pointers safely, knowing that
134  *    [key, key + map->key_size) bytes are valid and were initialized on
135  *    the stack of eBPF program.
136  * }
137  *
138  * Corresponding eBPF program may look like:
139  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
140  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
141  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
142  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
143  * here verifier looks at prototype of map_lookup_elem() and sees:
144  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
145  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
146  *
147  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
148  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
149  * and were initialized prior to this call.
150  * If it's ok, then verifier allows this BPF_CALL insn and looks at
151  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
152  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
153  * returns either pointer to map value or NULL.
154  *
155  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
156  * insn, the register holding that pointer in the true branch changes state to
157  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
158  * branch. See check_cond_jmp_op().
159  *
160  * After the call R0 is set to return type of the function and registers R1-R5
161  * are set to NOT_INIT to indicate that they are no longer readable.
162  *
163  * The following reference types represent a potential reference to a kernel
164  * resource which, after first being allocated, must be checked and freed by
165  * the BPF program:
166  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
167  *
168  * When the verifier sees a helper call return a reference type, it allocates a
169  * pointer id for the reference and stores it in the current function state.
170  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
171  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
172  * passes through a NULL-check conditional. For the branch wherein the state is
173  * changed to CONST_IMM, the verifier releases the reference.
174  *
175  * For each helper function that allocates a reference, such as
176  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
177  * bpf_sk_release(). When a reference type passes into the release function,
178  * the verifier also releases the reference. If any unchecked or unreleased
179  * reference remains at the end of the program, the verifier rejects it.
180  */
181 
182 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
183 struct bpf_verifier_stack_elem {
184 	/* verifier state is 'st'
185 	 * before processing instruction 'insn_idx'
186 	 * and after processing instruction 'prev_insn_idx'
187 	 */
188 	struct bpf_verifier_state st;
189 	int insn_idx;
190 	int prev_insn_idx;
191 	struct bpf_verifier_stack_elem *next;
192 	/* length of verifier log at the time this state was pushed on stack */
193 	u32 log_pos;
194 };
195 
196 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
197 #define BPF_COMPLEXITY_LIMIT_STATES	64
198 
199 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE  512
200 
201 #define BPF_PRIV_STACK_MIN_SIZE		64
202 
203 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx, int parent_id);
204 static int release_reference_nomark(struct bpf_verifier_state *state, int id);
205 static int release_reference(struct bpf_verifier_env *env, int id);
206 static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
207 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
208 static bool is_tracing_prog_type(enum bpf_prog_type type);
209 static int ref_set_non_owning(struct bpf_verifier_env *env,
210 			      struct bpf_reg_state *reg);
211 static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg);
212 static inline bool in_sleepable_context(struct bpf_verifier_env *env);
213 static const char *non_sleepable_context_description(struct bpf_verifier_env *env);
214 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
215 static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
216 
217 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
218 			      struct bpf_map *map,
219 			      bool unpriv, bool poison)
220 {
221 	unpriv |= bpf_map_ptr_unpriv(aux);
222 	aux->map_ptr_state.unpriv = unpriv;
223 	aux->map_ptr_state.poison = poison;
224 	aux->map_ptr_state.map_ptr = map;
225 }
226 
227 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
228 {
229 	bool poisoned = bpf_map_key_poisoned(aux);
230 
231 	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
232 			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
233 }
234 
235 static void update_ref_obj(struct ref_obj_desc *ref_obj, struct bpf_reg_state *reg)
236 {
237 	ref_obj->id = reg->id;
238 	ref_obj->parent_id = reg->parent_id;
239 	ref_obj->cnt++;
240 }
241 
242 static int validate_ref_obj(struct bpf_verifier_env *env, struct ref_obj_desc *ref_obj)
243 {
244 	if (ref_obj->cnt > 1) {
245 		verifier_bug(env, "function expects only one referenced object but got %d\n",
246 			     ref_obj->cnt);
247 		return -EFAULT;
248 	}
249 
250 	return 0;
251 }
252 
253 struct bpf_call_arg_meta {
254 	struct bpf_map_desc map;
255 	struct bpf_dynptr_desc dynptr;
256 	struct ref_obj_desc ref_obj;
257 	bool raw_mode;
258 	bool pkt_access;
259 	u8 release_regno;
260 	int regno;
261 	int access_size;
262 	int mem_size;
263 	u64 msize_max_value;
264 	int func_id;
265 	struct btf *btf;
266 	u32 btf_id;
267 	struct btf *ret_btf;
268 	u32 ret_btf_id;
269 	u32 subprogno;
270 	struct btf_field *kptr_field;
271 	s64 const_map_key;
272 };
273 
274 struct bpf_kfunc_meta {
275 	struct btf *btf;
276 	const struct btf_type *proto;
277 	const char *name;
278 	const u32 *flags;
279 	s32 id;
280 };
281 
282 struct btf *btf_vmlinux;
283 
284 typedef struct argno {
285 	int argno;
286 } argno_t;
287 
288 static argno_t argno_from_reg(u32 regno)
289 {
290 	return (argno_t){ .argno = regno };
291 }
292 
293 static argno_t argno_from_arg(u32 arg)
294 {
295 	return (argno_t){ .argno = -arg };
296 }
297 
298 static int reg_from_argno(argno_t a)
299 {
300 	if (a.argno >= 0)
301 		return a.argno;
302 	if (a.argno >= -MAX_BPF_FUNC_REG_ARGS)
303 		return -a.argno;
304 	return -1;
305 }
306 
307 static int arg_from_argno(argno_t a)
308 {
309 	if (a.argno < 0)
310 		return -a.argno;
311 	return -1;
312 }
313 
314 static int arg_idx_from_argno(argno_t a)
315 {
316 	return arg_from_argno(a) - 1;
317 }
318 
319 static const char *btf_type_name(const struct btf *btf, u32 id)
320 {
321 	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
322 }
323 
324 static DEFINE_MUTEX(bpf_verifier_lock);
325 static DEFINE_MUTEX(bpf_percpu_ma_lock);
326 
327 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
328 {
329 	struct bpf_verifier_env *env = private_data;
330 	va_list args;
331 
332 	if (!bpf_verifier_log_needed(&env->log))
333 		return;
334 
335 	va_start(args, fmt);
336 	bpf_verifier_vlog(&env->log, fmt, args);
337 	va_end(args);
338 }
339 
340 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
341 				   struct bpf_reg_state *reg,
342 				   struct bpf_retval_range range, const char *ctx,
343 				   const char *reg_name)
344 {
345 	bool unknown = true;
346 
347 	verbose(env, "%s the register %s has", ctx, reg_name);
348 	if (reg_smin(reg) > S64_MIN) {
349 		verbose(env, " smin=%lld", reg_smin(reg));
350 		unknown = false;
351 	}
352 	if (reg_smax(reg) < S64_MAX) {
353 		verbose(env, " smax=%lld", reg_smax(reg));
354 		unknown = false;
355 	}
356 	if (unknown)
357 		verbose(env, " unknown scalar value");
358 	verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
359 }
360 
361 static bool reg_not_null(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
362 {
363 	enum bpf_reg_type type;
364 
365 	type = reg->type;
366 	if (type_may_be_null(type))
367 		return false;
368 
369 	type = base_type(type);
370 	return type == PTR_TO_SOCKET ||
371 		type == PTR_TO_TCP_SOCK ||
372 		type == PTR_TO_MAP_VALUE ||
373 		type == PTR_TO_MAP_KEY ||
374 		type == PTR_TO_SOCK_COMMON ||
375 		(type == PTR_TO_BTF_ID && is_trusted_reg(env, reg)) ||
376 		(type == PTR_TO_MEM && !(reg->type & PTR_UNTRUSTED)) ||
377 		type == CONST_PTR_TO_MAP;
378 }
379 
380 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
381 {
382 	struct btf_record *rec = NULL;
383 	struct btf_struct_meta *meta;
384 
385 	if (reg->type == PTR_TO_MAP_VALUE) {
386 		rec = reg->map_ptr->record;
387 	} else if (type_is_ptr_alloc_obj(reg->type)) {
388 		meta = btf_find_struct_meta(reg->btf, reg->btf_id);
389 		if (meta)
390 			rec = meta->record;
391 	}
392 	return rec;
393 }
394 
395 bool bpf_subprog_is_global(const struct bpf_verifier_env *env, int subprog)
396 {
397 	struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
398 
399 	return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
400 }
401 
402 static bool subprog_returns_void(struct bpf_verifier_env *env, int subprog)
403 {
404 	const struct btf_type *type, *func, *func_proto;
405 	const struct btf *btf = env->prog->aux->btf;
406 	u32 btf_id;
407 
408 	btf_id = env->prog->aux->func_info[subprog].type_id;
409 
410 	func = btf_type_by_id(btf, btf_id);
411 	if (verifier_bug_if(!func, env, "btf_id %u not found", btf_id))
412 		return false;
413 
414 	func_proto = btf_type_by_id(btf, func->type);
415 	if (!func_proto)
416 		return false;
417 
418 	type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
419 	if (!type)
420 		return false;
421 
422 	return btf_type_is_void(type);
423 }
424 
425 static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
426 {
427 	struct bpf_func_info *info;
428 
429 	if (!env->prog->aux->func_info)
430 		return "";
431 
432 	info = &env->prog->aux->func_info[subprog];
433 	return btf_type_name(env->prog->aux->btf, info->type_id);
434 }
435 
436 void bpf_mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
437 {
438 	struct bpf_subprog_info *info = subprog_info(env, subprog);
439 
440 	info->is_cb = true;
441 	info->is_async_cb = true;
442 	info->is_exception_cb = true;
443 }
444 
445 static bool subprog_is_exc_cb(struct bpf_verifier_env *env, int subprog)
446 {
447 	return subprog_info(env, subprog)->is_exception_cb;
448 }
449 
450 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
451 {
452 	return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK);
453 }
454 
455 static bool type_is_rdonly_mem(u32 type)
456 {
457 	return type & MEM_RDONLY;
458 }
459 
460 static bool is_acquire_function(enum bpf_func_id func_id,
461 				const struct bpf_map *map)
462 {
463 	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
464 
465 	if (func_id == BPF_FUNC_sk_lookup_tcp ||
466 	    func_id == BPF_FUNC_sk_lookup_udp ||
467 	    func_id == BPF_FUNC_skc_lookup_tcp ||
468 	    func_id == BPF_FUNC_ringbuf_reserve ||
469 	    func_id == BPF_FUNC_kptr_xchg)
470 		return true;
471 
472 	if (func_id == BPF_FUNC_map_lookup_elem &&
473 	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
474 	     map_type == BPF_MAP_TYPE_SOCKHASH))
475 		return true;
476 
477 	return false;
478 }
479 
480 static bool is_ptr_cast_function(enum bpf_func_id func_id)
481 {
482 	return func_id == BPF_FUNC_tcp_sock ||
483 		func_id == BPF_FUNC_sk_fullsock ||
484 		func_id == BPF_FUNC_skc_to_tcp_sock ||
485 		func_id == BPF_FUNC_skc_to_tcp6_sock ||
486 		func_id == BPF_FUNC_skc_to_udp6_sock ||
487 		func_id == BPF_FUNC_skc_to_mptcp_sock ||
488 		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
489 		func_id == BPF_FUNC_skc_to_tcp_request_sock;
490 }
491 
492 static bool is_sync_callback_calling_kfunc(u32 btf_id);
493 static bool is_async_callback_calling_kfunc(u32 btf_id);
494 static bool is_callback_calling_kfunc(u32 btf_id);
495 
496 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id);
497 static bool is_task_work_add_kfunc(u32 func_id);
498 
499 static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
500 {
501 	return func_id == BPF_FUNC_for_each_map_elem ||
502 	       func_id == BPF_FUNC_find_vma ||
503 	       func_id == BPF_FUNC_loop ||
504 	       func_id == BPF_FUNC_user_ringbuf_drain;
505 }
506 
507 static bool is_async_callback_calling_function(enum bpf_func_id func_id)
508 {
509 	return func_id == BPF_FUNC_timer_set_callback;
510 }
511 
512 static bool is_callback_calling_function(enum bpf_func_id func_id)
513 {
514 	return is_sync_callback_calling_function(func_id) ||
515 	       is_async_callback_calling_function(func_id);
516 }
517 
518 bool bpf_is_sync_callback_calling_insn(struct bpf_insn *insn)
519 {
520 	return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
521 	       (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
522 }
523 
524 bool bpf_is_async_callback_calling_insn(struct bpf_insn *insn)
525 {
526 	return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
527 	       (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
528 }
529 
530 static bool is_async_cb_sleepable(struct bpf_verifier_env *env, struct bpf_insn *insn)
531 {
532 	/* bpf_timer callbacks are never sleepable. */
533 	if (bpf_helper_call(insn) && insn->imm == BPF_FUNC_timer_set_callback)
534 		return false;
535 
536 	/* bpf_wq and bpf_task_work callbacks are always sleepable. */
537 	if (bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
538 	    (is_bpf_wq_set_callback_kfunc(insn->imm) || is_task_work_add_kfunc(insn->imm)))
539 		return true;
540 
541 	verifier_bug(env, "unhandled async callback in is_async_cb_sleepable");
542 	return false;
543 }
544 
545 bool bpf_is_may_goto_insn(struct bpf_insn *insn)
546 {
547 	return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
548 }
549 
550 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
551 {
552        int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
553 
554        /* We need to check that slots between [spi - nr_slots + 1, spi] are
555 	* within [0, allocated_stack).
556 	*
557 	* Please note that the spi grows downwards. For example, a dynptr
558 	* takes the size of two stack slots; the first slot will be at
559 	* spi and the second slot will be at spi - 1.
560 	*/
561        return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
562 }
563 
564 static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
565 			          const char *obj_kind, int nr_slots)
566 {
567 	int off, spi;
568 
569 	if (!tnum_is_const(reg->var_off)) {
570 		verbose(env, "%s has to be at a constant offset\n", obj_kind);
571 		return -EINVAL;
572 	}
573 
574 	off = reg->var_off.value;
575 	if (off % BPF_REG_SIZE) {
576 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
577 		return -EINVAL;
578 	}
579 
580 	spi = bpf_get_spi(off);
581 	if (spi + 1 < nr_slots) {
582 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
583 		return -EINVAL;
584 	}
585 
586 	if (!is_spi_bounds_valid(bpf_func(env, reg), spi, nr_slots))
587 		return -ERANGE;
588 	return spi;
589 }
590 
591 static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
592 {
593 	return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
594 }
595 
596 static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
597 {
598 	return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
599 }
600 
601 static int irq_flag_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
602 {
603 	return stack_slot_obj_get_spi(env, reg, "irq_flag", 1);
604 }
605 
606 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
607 {
608 	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
609 	case DYNPTR_TYPE_LOCAL:
610 		return BPF_DYNPTR_TYPE_LOCAL;
611 	case DYNPTR_TYPE_RINGBUF:
612 		return BPF_DYNPTR_TYPE_RINGBUF;
613 	case DYNPTR_TYPE_SKB:
614 		return BPF_DYNPTR_TYPE_SKB;
615 	case DYNPTR_TYPE_XDP:
616 		return BPF_DYNPTR_TYPE_XDP;
617 	case DYNPTR_TYPE_SKB_META:
618 		return BPF_DYNPTR_TYPE_SKB_META;
619 	case DYNPTR_TYPE_FILE:
620 		return BPF_DYNPTR_TYPE_FILE;
621 	default:
622 		return BPF_DYNPTR_TYPE_INVALID;
623 	}
624 }
625 
626 static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
627 {
628 	switch (type) {
629 	case BPF_DYNPTR_TYPE_LOCAL:
630 		return DYNPTR_TYPE_LOCAL;
631 	case BPF_DYNPTR_TYPE_RINGBUF:
632 		return DYNPTR_TYPE_RINGBUF;
633 	case BPF_DYNPTR_TYPE_SKB:
634 		return DYNPTR_TYPE_SKB;
635 	case BPF_DYNPTR_TYPE_XDP:
636 		return DYNPTR_TYPE_XDP;
637 	case BPF_DYNPTR_TYPE_SKB_META:
638 		return DYNPTR_TYPE_SKB_META;
639 	case BPF_DYNPTR_TYPE_FILE:
640 		return DYNPTR_TYPE_FILE;
641 	default:
642 		return 0;
643 	}
644 }
645 
646 static bool dynptr_type_referenced(enum bpf_dynptr_type type)
647 {
648 	return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE;
649 }
650 
651 static void __mark_dynptr_reg(struct bpf_reg_state *reg,
652 			      enum bpf_dynptr_type type,
653 			      bool first_slot, int id, int parent_id);
654 
655 
656 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
657 				   struct bpf_reg_state *sreg1,
658 				   struct bpf_reg_state *sreg2,
659 				   enum bpf_dynptr_type type, int parent_id)
660 {
661 	int id = ++env->id_gen;
662 
663 	__mark_dynptr_reg(sreg1, type, true, id, parent_id);
664 	__mark_dynptr_reg(sreg2, type, false, id, parent_id);
665 }
666 
667 static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
668 			       struct bpf_reg_state *reg,
669 			       enum bpf_dynptr_type type)
670 {
671 	__mark_dynptr_reg(reg, type, true, ++env->id_gen, 0);
672 }
673 
674 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
675 				        struct bpf_func_state *state, int spi);
676 
677 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
678 				   enum bpf_arg_type arg_type, int insn_idx,
679 				   struct ref_obj_desc *ref_obj, struct bpf_dynptr_desc *dynptr)
680 {
681 	struct bpf_func_state *state = bpf_func(env, reg);
682 	int spi, i, err, parent_id = 0;
683 	enum bpf_dynptr_type type;
684 
685 	spi = dynptr_get_spi(env, reg);
686 	if (spi < 0)
687 		return spi;
688 
689 	/* We cannot assume both spi and spi - 1 belong to the same dynptr,
690 	 * hence we need to call destroy_if_dynptr_stack_slot twice for both,
691 	 * to ensure that for the following example:
692 	 *	[d1][d1][d2][d2]
693 	 * spi    3   2   1   0
694 	 * So marking spi = 2 should lead to destruction of both d1 and d2. In
695 	 * case they do belong to same dynptr, second call won't see slot_type
696 	 * as STACK_DYNPTR and will simply skip destruction.
697 	 */
698 	err = destroy_if_dynptr_stack_slot(env, state, spi);
699 	if (err)
700 		return err;
701 	err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
702 	if (err)
703 		return err;
704 
705 	for (i = 0; i < BPF_REG_SIZE; i++) {
706 		state->stack[spi].slot_type[i] = STACK_DYNPTR;
707 		state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
708 	}
709 
710 	type = arg_to_dynptr_type(arg_type);
711 	if (type == BPF_DYNPTR_TYPE_INVALID)
712 		return -EINVAL;
713 
714 	if (dynptr->type == BPF_DYNPTR_TYPE_INVALID) { /* dynptr constructors */
715 		err = validate_ref_obj(env, ref_obj);
716 		if (err)
717 			return err;
718 
719 		/* Track parent's id if the parent is a referenced object */
720 		parent_id = ref_obj->id;
721 
722 		if (dynptr_type_referenced(type)) {
723 			int id;
724 
725 			/*
726 			 * Create an intermediate reference that tracks the referenced
727 			 * object for the referenced dynptr. Freeing a referenced dynptr
728 			 * through helpers/kfuncs will invalidate all clones.
729 			 */
730 			id = acquire_reference(env, insn_idx, parent_id);
731 			if (id < 0)
732 				return id;
733 
734 			parent_id = id;
735 		}
736 	} else { /* bpf_dynptr_clone() */
737 		parent_id = dynptr->parent_id;
738 	}
739 
740 	mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
741 			       &state->stack[spi - 1].spilled_ptr, type, parent_id);
742 
743 	return 0;
744 }
745 
746 static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_stack_state *stack)
747 {
748 	int i;
749 
750 	for (i = 0; i < BPF_REG_SIZE; i++) {
751 		stack[0].slot_type[i] = STACK_INVALID;
752 		stack[1].slot_type[i] = STACK_INVALID;
753 	}
754 
755 	bpf_mark_reg_not_init(env, &stack[0].spilled_ptr);
756 	bpf_mark_reg_not_init(env, &stack[1].spilled_ptr);
757 }
758 
759 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
760 {
761 	struct bpf_func_state *state = bpf_func(env, reg);
762 	int spi;
763 
764 	spi = dynptr_get_spi(env, reg);
765 	if (spi < 0)
766 		return spi;
767 
768 	/*
769 	 * For referenced dynptr, release the parent ref which cascades to
770 	 * all clones and derived slices. For non-referenced dynptr, only
771 	 * the dynptr and slices derived from it will be invalidated.
772 	 */
773 	reg = &state->stack[spi].spilled_ptr;
774 	return release_reference(env, dynptr_type_referenced(reg->dynptr.type)
775 				      ? reg->parent_id
776 				      : reg->id);
777 }
778 
779 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
780 			       struct bpf_reg_state *reg);
781 
782 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
783 {
784 	if (!env->allow_ptr_leaks)
785 		bpf_mark_reg_not_init(env, reg);
786 	else
787 		__mark_reg_unknown(env, reg);
788 }
789 
790 static int dynptr_ref_cnt(struct bpf_verifier_env *env, int v_parent_id)
791 {
792 	struct bpf_stack_state *stack;
793 	struct bpf_func_state *state;
794 	struct bpf_reg_state *reg;
795 	int ref_cnt = 0;
796 
797 	bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, stack, 1 << STACK_DYNPTR, ({
798 		if (!stack || stack->slot_type[0] != STACK_DYNPTR)
799 			continue;
800 		if (!stack->spilled_ptr.dynptr.first_slot)
801 			continue;
802 		if (stack->spilled_ptr.parent_id == v_parent_id)
803 			ref_cnt++;
804 	}));
805 
806 	return ref_cnt;
807 }
808 
809 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
810 				        struct bpf_func_state *state, int spi)
811 {
812 	int err = 0;
813 
814 	/* We always ensure that STACK_DYNPTR is never set partially,
815 	 * hence just checking for slot_type[0] is enough. This is
816 	 * different for STACK_SPILL, where it may be only set for
817 	 * 1 byte, so code has to use is_spilled_reg.
818 	 */
819 	if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
820 		return 0;
821 
822 	/* Reposition spi to first slot */
823 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
824 		spi = spi + 1;
825 
826 	/*
827 	 * A referenced dynptr can be overwritten only if there is at
828 	 * least one other dynptr sharing the same virtual ref parent,
829 	 * ensuring the reference can still be properly released.
830 	 */
831 	if (dynptr_type_referenced(state->stack[spi].spilled_ptr.dynptr.type) &&
832 	    dynptr_ref_cnt(env, state->stack[spi].spilled_ptr.parent_id) <= 1) {
833 		verbose(env, "cannot overwrite referenced dynptr\n");
834 		return -EINVAL;
835 	}
836 
837 	/* Invalidate the dynptr and any derived slices */
838 	err = release_reference(env, state->stack[spi].spilled_ptr.id);
839 	if (!err) {
840 		mark_stack_slot_scratched(env, spi);
841 		mark_stack_slot_scratched(env, spi - 1);
842 	}
843 
844 	return err;
845 }
846 
847 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
848 {
849 	int spi;
850 
851 	if (reg->type == CONST_PTR_TO_DYNPTR)
852 		return false;
853 
854 	spi = dynptr_get_spi(env, reg);
855 
856 	/* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
857 	 * error because this just means the stack state hasn't been updated yet.
858 	 * We will do check_mem_access to check and update stack bounds later.
859 	 */
860 	if (spi < 0 && spi != -ERANGE)
861 		return false;
862 
863 	/* We don't need to check if the stack slots are marked by previous
864 	 * dynptr initializations because we allow overwriting existing unreferenced
865 	 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
866 	 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
867 	 * touching are completely destructed before we reinitialize them for a new
868 	 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
869 	 * instead of delaying it until the end where the user will get "Unreleased
870 	 * reference" error.
871 	 */
872 	return true;
873 }
874 
875 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
876 {
877 	struct bpf_func_state *state = bpf_func(env, reg);
878 	int i, spi;
879 
880 	/* This already represents first slot of initialized bpf_dynptr.
881 	 *
882 	 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
883 	 * check_func_arg_reg_off's logic, so we don't need to check its
884 	 * offset and alignment.
885 	 */
886 	if (reg->type == CONST_PTR_TO_DYNPTR)
887 		return true;
888 
889 	spi = dynptr_get_spi(env, reg);
890 	if (spi < 0)
891 		return false;
892 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
893 		return false;
894 
895 	for (i = 0; i < BPF_REG_SIZE; i++) {
896 		if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
897 		    state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
898 			return false;
899 	}
900 
901 	return true;
902 }
903 
904 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
905 				    enum bpf_arg_type arg_type)
906 {
907 	struct bpf_func_state *state = bpf_func(env, reg);
908 	enum bpf_dynptr_type dynptr_type;
909 	int spi;
910 
911 	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
912 	if (arg_type == ARG_PTR_TO_DYNPTR)
913 		return true;
914 
915 	dynptr_type = arg_to_dynptr_type(arg_type);
916 	if (reg->type == CONST_PTR_TO_DYNPTR) {
917 		return reg->dynptr.type == dynptr_type;
918 	} else {
919 		spi = dynptr_get_spi(env, reg);
920 		if (spi < 0)
921 			return false;
922 		return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
923 	}
924 }
925 
926 static void __mark_reg_known_zero(struct bpf_reg_state *reg);
927 
928 static bool in_rcu_cs(struct bpf_verifier_env *env);
929 
930 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta);
931 
932 static int mark_stack_slots_iter(struct bpf_verifier_env *env,
933 				 struct bpf_kfunc_call_arg_meta *meta,
934 				 struct bpf_reg_state *reg, int insn_idx,
935 				 struct btf *btf, u32 btf_id, int nr_slots)
936 {
937 	struct bpf_func_state *state = bpf_func(env, reg);
938 	int spi, i, j, id;
939 
940 	spi = iter_get_spi(env, reg, nr_slots);
941 	if (spi < 0)
942 		return spi;
943 
944 	id = acquire_reference(env, insn_idx, 0);
945 	if (id < 0)
946 		return id;
947 
948 	for (i = 0; i < nr_slots; i++) {
949 		struct bpf_stack_state *slot = &state->stack[spi - i];
950 		struct bpf_reg_state *st = &slot->spilled_ptr;
951 
952 		__mark_reg_known_zero(st);
953 		st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
954 		if (is_kfunc_rcu_protected(meta)) {
955 			if (in_rcu_cs(env))
956 				st->type |= MEM_RCU;
957 			else
958 				st->type |= PTR_UNTRUSTED;
959 		}
960 		st->id = i == 0 ? id : 0;
961 		st->iter.btf = btf;
962 		st->iter.btf_id = btf_id;
963 		st->iter.state = BPF_ITER_STATE_ACTIVE;
964 		st->iter.depth = 0;
965 
966 		for (j = 0; j < BPF_REG_SIZE; j++)
967 			slot->slot_type[j] = STACK_ITER;
968 
969 		mark_stack_slot_scratched(env, spi - i);
970 	}
971 
972 	return 0;
973 }
974 
975 static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
976 				   struct bpf_reg_state *reg, int nr_slots)
977 {
978 	struct bpf_func_state *state = bpf_func(env, reg);
979 	int spi, i, j;
980 
981 	spi = iter_get_spi(env, reg, nr_slots);
982 	if (spi < 0)
983 		return spi;
984 
985 	for (i = 0; i < nr_slots; i++) {
986 		struct bpf_stack_state *slot = &state->stack[spi - i];
987 		struct bpf_reg_state *st = &slot->spilled_ptr;
988 
989 		if (i == 0)
990 			WARN_ON_ONCE(release_reference(env, st->id));
991 
992 		bpf_mark_reg_not_init(env, st);
993 
994 		for (j = 0; j < BPF_REG_SIZE; j++)
995 			slot->slot_type[j] = STACK_INVALID;
996 
997 		mark_stack_slot_scratched(env, spi - i);
998 	}
999 
1000 	return 0;
1001 }
1002 
1003 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1004 				     struct bpf_reg_state *reg, int nr_slots)
1005 {
1006 	struct bpf_func_state *state = bpf_func(env, reg);
1007 	int spi, i, j;
1008 
1009 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1010 	 * will do check_mem_access to check and update stack bounds later, so
1011 	 * return true for that case.
1012 	 */
1013 	spi = iter_get_spi(env, reg, nr_slots);
1014 	if (spi == -ERANGE)
1015 		return true;
1016 	if (spi < 0)
1017 		return false;
1018 
1019 	for (i = 0; i < nr_slots; i++) {
1020 		struct bpf_stack_state *slot = &state->stack[spi - i];
1021 
1022 		for (j = 0; j < BPF_REG_SIZE; j++)
1023 			if (slot->slot_type[j] == STACK_ITER)
1024 				return false;
1025 	}
1026 
1027 	return true;
1028 }
1029 
1030 static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1031 				   struct btf *btf, u32 btf_id, int nr_slots)
1032 {
1033 	struct bpf_func_state *state = bpf_func(env, reg);
1034 	int spi, i, j;
1035 
1036 	spi = iter_get_spi(env, reg, nr_slots);
1037 	if (spi < 0)
1038 		return -EINVAL;
1039 
1040 	for (i = 0; i < nr_slots; i++) {
1041 		struct bpf_stack_state *slot = &state->stack[spi - i];
1042 		struct bpf_reg_state *st = &slot->spilled_ptr;
1043 
1044 		if (st->type & PTR_UNTRUSTED)
1045 			return -EPROTO;
1046 		/* only main (first) slot has id set */
1047 		if (i == 0 && !st->id)
1048 			return -EINVAL;
1049 		if (i != 0 && st->id)
1050 			return -EINVAL;
1051 		if (st->iter.btf != btf || st->iter.btf_id != btf_id)
1052 			return -EINVAL;
1053 
1054 		for (j = 0; j < BPF_REG_SIZE; j++)
1055 			if (slot->slot_type[j] != STACK_ITER)
1056 				return -EINVAL;
1057 	}
1058 
1059 	return 0;
1060 }
1061 
1062 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx);
1063 static int release_irq_state(struct bpf_verifier_state *state, int id);
1064 
1065 static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
1066 				     struct bpf_kfunc_call_arg_meta *meta,
1067 				     struct bpf_reg_state *reg, int insn_idx,
1068 				     int kfunc_class)
1069 {
1070 	struct bpf_func_state *state = bpf_func(env, reg);
1071 	struct bpf_stack_state *slot;
1072 	struct bpf_reg_state *st;
1073 	int spi, i, id;
1074 
1075 	spi = irq_flag_get_spi(env, reg);
1076 	if (spi < 0)
1077 		return spi;
1078 
1079 	id = acquire_irq_state(env, insn_idx);
1080 	if (id < 0)
1081 		return id;
1082 
1083 	slot = &state->stack[spi];
1084 	st = &slot->spilled_ptr;
1085 
1086 	__mark_reg_known_zero(st);
1087 	st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1088 	st->id = id;
1089 	st->irq.kfunc_class = kfunc_class;
1090 
1091 	for (i = 0; i < BPF_REG_SIZE; i++)
1092 		slot->slot_type[i] = STACK_IRQ_FLAG;
1093 
1094 	mark_stack_slot_scratched(env, spi);
1095 	return 0;
1096 }
1097 
1098 static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1099 				      int kfunc_class)
1100 {
1101 	struct bpf_func_state *state = bpf_func(env, reg);
1102 	struct bpf_stack_state *slot;
1103 	struct bpf_reg_state *st;
1104 	int spi, i, err;
1105 
1106 	spi = irq_flag_get_spi(env, reg);
1107 	if (spi < 0)
1108 		return spi;
1109 
1110 	slot = &state->stack[spi];
1111 	st = &slot->spilled_ptr;
1112 
1113 	if (st->irq.kfunc_class != kfunc_class) {
1114 		const char *flag_kfunc = st->irq.kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1115 		const char *used_kfunc = kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1116 
1117 		verbose(env, "irq flag acquired by %s kfuncs cannot be restored with %s kfuncs\n",
1118 			flag_kfunc, used_kfunc);
1119 		return -EINVAL;
1120 	}
1121 
1122 	err = release_irq_state(env->cur_state, st->id);
1123 	WARN_ON_ONCE(err && err != -EACCES);
1124 	if (err) {
1125 		int insn_idx = 0;
1126 
1127 		for (int i = 0; i < env->cur_state->acquired_refs; i++) {
1128 			if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) {
1129 				insn_idx = env->cur_state->refs[i].insn_idx;
1130 				break;
1131 			}
1132 		}
1133 
1134 		verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n",
1135 			env->cur_state->active_irq_id, insn_idx);
1136 		return err;
1137 	}
1138 
1139 	bpf_mark_reg_not_init(env, st);
1140 
1141 	for (i = 0; i < BPF_REG_SIZE; i++)
1142 		slot->slot_type[i] = STACK_INVALID;
1143 
1144 	mark_stack_slot_scratched(env, spi);
1145 	return 0;
1146 }
1147 
1148 static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1149 {
1150 	struct bpf_func_state *state = bpf_func(env, reg);
1151 	struct bpf_stack_state *slot;
1152 	int spi, i;
1153 
1154 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1155 	 * will do check_mem_access to check and update stack bounds later, so
1156 	 * return true for that case.
1157 	 */
1158 	spi = irq_flag_get_spi(env, reg);
1159 	if (spi == -ERANGE)
1160 		return true;
1161 	if (spi < 0)
1162 		return false;
1163 
1164 	slot = &state->stack[spi];
1165 
1166 	for (i = 0; i < BPF_REG_SIZE; i++)
1167 		if (slot->slot_type[i] == STACK_IRQ_FLAG)
1168 			return false;
1169 	return true;
1170 }
1171 
1172 static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1173 {
1174 	struct bpf_func_state *state = bpf_func(env, reg);
1175 	struct bpf_stack_state *slot;
1176 	struct bpf_reg_state *st;
1177 	int spi, i;
1178 
1179 	spi = irq_flag_get_spi(env, reg);
1180 	if (spi < 0)
1181 		return -EINVAL;
1182 
1183 	slot = &state->stack[spi];
1184 	st = &slot->spilled_ptr;
1185 
1186 	if (!st->id)
1187 		return -EINVAL;
1188 
1189 	for (i = 0; i < BPF_REG_SIZE; i++)
1190 		if (slot->slot_type[i] != STACK_IRQ_FLAG)
1191 			return -EINVAL;
1192 	return 0;
1193 }
1194 
1195 /* Check if given stack slot is "special":
1196  *   - spilled register state (STACK_SPILL);
1197  *   - dynptr state (STACK_DYNPTR);
1198  *   - iter state (STACK_ITER).
1199  *   - irq flag state (STACK_IRQ_FLAG)
1200  */
1201 static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1202 {
1203 	enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
1204 
1205 	switch (type) {
1206 	case STACK_SPILL:
1207 	case STACK_DYNPTR:
1208 	case STACK_ITER:
1209 	case STACK_IRQ_FLAG:
1210 		return true;
1211 	case STACK_INVALID:
1212 	case STACK_POISON:
1213 	case STACK_MISC:
1214 	case STACK_ZERO:
1215 		return false;
1216 	default:
1217 		WARN_ONCE(1, "unknown stack slot type %d\n", type);
1218 		return true;
1219 	}
1220 }
1221 
1222 /* The reg state of a pointer or a bounded scalar was saved when
1223  * it was spilled to the stack.
1224  */
1225 
1226 /*
1227  * Mark stack slot as STACK_MISC, unless it is already:
1228  * - STACK_INVALID, in which case they are equivalent.
1229  * - STACK_ZERO, in which case we preserve more precise STACK_ZERO.
1230  * - STACK_POISON, which truly forbids access to the slot.
1231  * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged
1232  * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is
1233  * unnecessary as both are considered equivalent when loading data and pruning,
1234  * in case of unprivileged mode it will be incorrect to allow reads of invalid
1235  * slots.
1236  */
1237 static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
1238 {
1239 	if (*stype == STACK_ZERO)
1240 		return;
1241 	if (*stype == STACK_INVALID || *stype == STACK_POISON)
1242 		return;
1243 	*stype = STACK_MISC;
1244 }
1245 
1246 static void scrub_spilled_slot(u8 *stype)
1247 {
1248 	if (*stype != STACK_INVALID && *stype != STACK_POISON)
1249 		*stype = STACK_MISC;
1250 }
1251 
1252 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1253  * small to hold src. This is different from krealloc since we don't want to preserve
1254  * the contents of dst.
1255  *
1256  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1257  * not be allocated.
1258  */
1259 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1260 {
1261 	size_t alloc_bytes;
1262 	void *orig = dst;
1263 	size_t bytes;
1264 
1265 	if (ZERO_OR_NULL_PTR(src))
1266 		goto out;
1267 
1268 	if (unlikely(check_mul_overflow(n, size, &bytes)))
1269 		return NULL;
1270 
1271 	alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1272 	dst = krealloc(orig, alloc_bytes, flags);
1273 	if (!dst) {
1274 		kfree(orig);
1275 		return NULL;
1276 	}
1277 
1278 	memcpy(dst, src, bytes);
1279 out:
1280 	return dst ? dst : ZERO_SIZE_PTR;
1281 }
1282 
1283 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1284  * small to hold new_n items. new items are zeroed out if the array grows.
1285  *
1286  * Contrary to krealloc_array, does not free arr if new_n is zero.
1287  */
1288 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1289 {
1290 	size_t alloc_size;
1291 	void *new_arr;
1292 
1293 	if (!new_n || old_n == new_n)
1294 		goto out;
1295 
1296 	alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1297 	new_arr = krealloc(arr, alloc_size, GFP_KERNEL_ACCOUNT);
1298 	if (!new_arr) {
1299 		kfree(arr);
1300 		return NULL;
1301 	}
1302 	arr = new_arr;
1303 
1304 	if (new_n > old_n)
1305 		memset(arr + old_n * size, 0, (new_n - old_n) * size);
1306 
1307 out:
1308 	return arr ? arr : ZERO_SIZE_PTR;
1309 }
1310 
1311 static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src)
1312 {
1313 	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1314 			       sizeof(struct bpf_reference_state), GFP_KERNEL_ACCOUNT);
1315 	if (!dst->refs)
1316 		return -ENOMEM;
1317 
1318 	dst->acquired_refs = src->acquired_refs;
1319 	dst->active_locks = src->active_locks;
1320 	dst->active_preempt_locks = src->active_preempt_locks;
1321 	dst->active_rcu_locks = src->active_rcu_locks;
1322 	dst->active_irq_id = src->active_irq_id;
1323 	dst->active_lock_id = src->active_lock_id;
1324 	dst->active_lock_ptr = src->active_lock_ptr;
1325 	return 0;
1326 }
1327 
1328 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1329 {
1330 	size_t n = src->allocated_stack / BPF_REG_SIZE;
1331 
1332 	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1333 				GFP_KERNEL_ACCOUNT);
1334 	if (!dst->stack)
1335 		return -ENOMEM;
1336 
1337 	dst->allocated_stack = src->allocated_stack;
1338 
1339 	/* copy stack args state */
1340 	n = src->out_stack_arg_cnt;
1341 	if (n) {
1342 		dst->stack_arg_regs = copy_array(dst->stack_arg_regs, src->stack_arg_regs, n,
1343 						 sizeof(struct bpf_reg_state),
1344 						 GFP_KERNEL_ACCOUNT);
1345 		if (!dst->stack_arg_regs)
1346 			return -ENOMEM;
1347 	}
1348 
1349 	dst->out_stack_arg_cnt = src->out_stack_arg_cnt;
1350 	return 0;
1351 }
1352 
1353 static int resize_reference_state(struct bpf_verifier_state *state, size_t n)
1354 {
1355 	state->refs = realloc_array(state->refs, state->acquired_refs, n,
1356 				    sizeof(struct bpf_reference_state));
1357 	if (!state->refs)
1358 		return -ENOMEM;
1359 
1360 	state->acquired_refs = n;
1361 	return 0;
1362 }
1363 
1364 /* Possibly update state->allocated_stack to be at least size bytes. Also
1365  * possibly update the function's high-water mark in its bpf_subprog_info.
1366  */
1367 static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
1368 {
1369 	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n;
1370 
1371 	/* The stack size is always a multiple of BPF_REG_SIZE. */
1372 	size = round_up(size, BPF_REG_SIZE);
1373 	n = size / BPF_REG_SIZE;
1374 
1375 	if (old_n >= n)
1376 		return 0;
1377 
1378 	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1379 	if (!state->stack)
1380 		return -ENOMEM;
1381 
1382 	state->allocated_stack = size;
1383 
1384 	/* update known max for given subprogram */
1385 	if (env->subprog_info[state->subprogno].stack_depth < size)
1386 		env->subprog_info[state->subprogno].stack_depth = size;
1387 
1388 	return 0;
1389 }
1390 
1391 static int grow_stack_arg_slots(struct bpf_verifier_env *env,
1392 				struct bpf_func_state *state, int cnt)
1393 {
1394 	size_t old_n = state->out_stack_arg_cnt;
1395 
1396 	if (old_n >= cnt)
1397 		return 0;
1398 
1399 	state->stack_arg_regs = realloc_array(state->stack_arg_regs, old_n, cnt,
1400 					      sizeof(struct bpf_reg_state));
1401 	if (!state->stack_arg_regs)
1402 		return -ENOMEM;
1403 
1404 	state->out_stack_arg_cnt = cnt;
1405 	return 0;
1406 }
1407 
1408 /* Acquire a pointer id from the env and update the state->refs to include
1409  * this new pointer reference.
1410  * On success, returns a valid pointer id to associate with the register
1411  * On failure, returns a negative errno.
1412  */
1413 static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1414 {
1415 	struct bpf_verifier_state *state = env->cur_state;
1416 	int new_ofs = state->acquired_refs;
1417 	int err;
1418 
1419 	err = resize_reference_state(state, state->acquired_refs + 1);
1420 	if (err)
1421 		return NULL;
1422 	state->refs[new_ofs].insn_idx = insn_idx;
1423 
1424 	return &state->refs[new_ofs];
1425 }
1426 
1427 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx, int parent_id)
1428 {
1429 	struct bpf_reference_state *s;
1430 
1431 	s = acquire_reference_state(env, insn_idx);
1432 	if (!s)
1433 		return -ENOMEM;
1434 	s->type = REF_TYPE_PTR;
1435 	s->id = ++env->id_gen;
1436 	s->parent_id = parent_id;
1437 	return s->id;
1438 }
1439 
1440 static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type,
1441 			      int id, void *ptr)
1442 {
1443 	struct bpf_verifier_state *state = env->cur_state;
1444 	struct bpf_reference_state *s;
1445 
1446 	s = acquire_reference_state(env, insn_idx);
1447 	if (!s)
1448 		return -ENOMEM;
1449 	s->type = type;
1450 	s->id = id;
1451 	s->ptr = ptr;
1452 
1453 	state->active_locks++;
1454 	state->active_lock_id = id;
1455 	state->active_lock_ptr = ptr;
1456 	return 0;
1457 }
1458 
1459 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx)
1460 {
1461 	struct bpf_verifier_state *state = env->cur_state;
1462 	struct bpf_reference_state *s;
1463 
1464 	s = acquire_reference_state(env, insn_idx);
1465 	if (!s)
1466 		return -ENOMEM;
1467 	s->type = REF_TYPE_IRQ;
1468 	s->id = ++env->id_gen;
1469 
1470 	state->active_irq_id = s->id;
1471 	return s->id;
1472 }
1473 
1474 static void release_reference_state(struct bpf_verifier_state *state, int idx)
1475 {
1476 	int last_idx;
1477 	size_t rem;
1478 
1479 	/* IRQ state requires the relative ordering of elements remaining the
1480 	 * same, since it relies on the refs array to behave as a stack, so that
1481 	 * it can detect out-of-order IRQ restore. Hence use memmove to shift
1482 	 * the array instead of swapping the final element into the deleted idx.
1483 	 */
1484 	last_idx = state->acquired_refs - 1;
1485 	rem = state->acquired_refs - idx - 1;
1486 	if (last_idx && idx != last_idx)
1487 		memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem);
1488 	memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1489 	state->acquired_refs--;
1490 	return;
1491 }
1492 
1493 static bool find_reference_state(struct bpf_verifier_state *state, int id)
1494 {
1495 	int i;
1496 
1497 	for (i = 0; i < state->acquired_refs; i++) {
1498 		if (state->refs[i].type != REF_TYPE_PTR)
1499 			continue;
1500 		if (state->refs[i].id == id)
1501 			return true;
1502 	}
1503 
1504 	return false;
1505 }
1506 
1507 static bool reg_is_referenced(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
1508 {
1509 	return find_reference_state(env->cur_state, reg->id);
1510 }
1511 
1512 static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr)
1513 {
1514 	void *prev_ptr = NULL;
1515 	u32 prev_id = 0;
1516 	int i;
1517 
1518 	for (i = 0; i < state->acquired_refs; i++) {
1519 		if (state->refs[i].type == type && state->refs[i].id == id &&
1520 		    state->refs[i].ptr == ptr) {
1521 			release_reference_state(state, i);
1522 			state->active_locks--;
1523 			/* Reassign active lock (id, ptr). */
1524 			state->active_lock_id = prev_id;
1525 			state->active_lock_ptr = prev_ptr;
1526 			return 0;
1527 		}
1528 		if (state->refs[i].type & REF_TYPE_LOCK_MASK) {
1529 			prev_id = state->refs[i].id;
1530 			prev_ptr = state->refs[i].ptr;
1531 		}
1532 	}
1533 	return -EINVAL;
1534 }
1535 
1536 static int release_irq_state(struct bpf_verifier_state *state, int id)
1537 {
1538 	u32 prev_id = 0;
1539 	int i;
1540 
1541 	if (id != state->active_irq_id)
1542 		return -EACCES;
1543 
1544 	for (i = 0; i < state->acquired_refs; i++) {
1545 		if (state->refs[i].type != REF_TYPE_IRQ)
1546 			continue;
1547 		if (state->refs[i].id == id) {
1548 			release_reference_state(state, i);
1549 			state->active_irq_id = prev_id;
1550 			return 0;
1551 		} else {
1552 			prev_id = state->refs[i].id;
1553 		}
1554 	}
1555 	return -EINVAL;
1556 }
1557 
1558 static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type,
1559 						   int id, void *ptr)
1560 {
1561 	int i;
1562 
1563 	for (i = 0; i < state->acquired_refs; i++) {
1564 		struct bpf_reference_state *s = &state->refs[i];
1565 
1566 		if (!(s->type & type))
1567 			continue;
1568 
1569 		if (s->id == id && s->ptr == ptr)
1570 			return s;
1571 	}
1572 	return NULL;
1573 }
1574 
1575 static void free_func_state(struct bpf_func_state *state)
1576 {
1577 	if (!state)
1578 		return;
1579 	kfree(state->stack_arg_regs);
1580 	kfree(state->stack);
1581 	kfree(state);
1582 }
1583 
1584 void bpf_clear_jmp_history(struct bpf_verifier_state *state)
1585 {
1586 	kfree(state->jmp_history);
1587 	state->jmp_history = NULL;
1588 	state->jmp_history_cnt = 0;
1589 }
1590 
1591 void bpf_free_verifier_state(struct bpf_verifier_state *state,
1592 			    bool free_self)
1593 {
1594 	int i;
1595 
1596 	for (i = 0; i <= state->curframe; i++) {
1597 		free_func_state(state->frame[i]);
1598 		state->frame[i] = NULL;
1599 	}
1600 	kfree(state->refs);
1601 	bpf_clear_jmp_history(state);
1602 	if (free_self)
1603 		kfree(state);
1604 }
1605 
1606 /* copy verifier state from src to dst growing dst stack space
1607  * when necessary to accommodate larger src stack
1608  */
1609 static int copy_func_state(struct bpf_func_state *dst,
1610 			   const struct bpf_func_state *src)
1611 {
1612 	memcpy(dst, src, offsetof(struct bpf_func_state, stack));
1613 	return copy_stack_state(dst, src);
1614 }
1615 
1616 int bpf_copy_verifier_state(struct bpf_verifier_state *dst_state,
1617 			   const struct bpf_verifier_state *src)
1618 {
1619 	struct bpf_func_state *dst;
1620 	int i, err;
1621 
1622 	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1623 					  src->jmp_history_cnt, sizeof(*dst_state->jmp_history),
1624 					  GFP_KERNEL_ACCOUNT);
1625 	if (!dst_state->jmp_history)
1626 		return -ENOMEM;
1627 	dst_state->jmp_history_cnt = src->jmp_history_cnt;
1628 
1629 	/* if dst has more stack frames then src frame, free them, this is also
1630 	 * necessary in case of exceptional exits using bpf_throw.
1631 	 */
1632 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1633 		free_func_state(dst_state->frame[i]);
1634 		dst_state->frame[i] = NULL;
1635 	}
1636 	err = copy_reference_state(dst_state, src);
1637 	if (err)
1638 		return err;
1639 	dst_state->speculative = src->speculative;
1640 	dst_state->in_sleepable = src->in_sleepable;
1641 	dst_state->curframe = src->curframe;
1642 	dst_state->branches = src->branches;
1643 	dst_state->parent = src->parent;
1644 	dst_state->first_insn_idx = src->first_insn_idx;
1645 	dst_state->last_insn_idx = src->last_insn_idx;
1646 	dst_state->dfs_depth = src->dfs_depth;
1647 	dst_state->callback_unroll_depth = src->callback_unroll_depth;
1648 	dst_state->may_goto_depth = src->may_goto_depth;
1649 	dst_state->equal_state = src->equal_state;
1650 	for (i = 0; i <= src->curframe; i++) {
1651 		dst = dst_state->frame[i];
1652 		if (!dst) {
1653 			dst = kzalloc_obj(*dst, GFP_KERNEL_ACCOUNT);
1654 			if (!dst)
1655 				return -ENOMEM;
1656 			dst_state->frame[i] = dst;
1657 		}
1658 		err = copy_func_state(dst, src->frame[i]);
1659 		if (err)
1660 			return err;
1661 	}
1662 	return 0;
1663 }
1664 
1665 static u32 state_htab_size(struct bpf_verifier_env *env)
1666 {
1667 	return env->prog->len;
1668 }
1669 
1670 struct list_head *bpf_explored_state(struct bpf_verifier_env *env, int idx)
1671 {
1672 	struct bpf_verifier_state *cur = env->cur_state;
1673 	struct bpf_func_state *state = cur->frame[cur->curframe];
1674 
1675 	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
1676 }
1677 
1678 static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b)
1679 {
1680 	int fr;
1681 
1682 	if (a->curframe != b->curframe)
1683 		return false;
1684 
1685 	for (fr = a->curframe; fr >= 0; fr--)
1686 		if (a->frame[fr]->callsite != b->frame[fr]->callsite)
1687 			return false;
1688 
1689 	return true;
1690 }
1691 
1692 
1693 void bpf_free_backedges(struct bpf_scc_visit *visit)
1694 {
1695 	struct bpf_scc_backedge *backedge, *next;
1696 
1697 	for (backedge = visit->backedges; backedge; backedge = next) {
1698 		bpf_free_verifier_state(&backedge->state, false);
1699 		next = backedge->next;
1700 		kfree(backedge);
1701 	}
1702 	visit->backedges = NULL;
1703 }
1704 
1705 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1706 		     int *insn_idx, bool pop_log)
1707 {
1708 	struct bpf_verifier_state *cur = env->cur_state;
1709 	struct bpf_verifier_stack_elem *elem, *head = env->head;
1710 	int err;
1711 
1712 	if (env->head == NULL)
1713 		return -ENOENT;
1714 
1715 	if (cur) {
1716 		err = bpf_copy_verifier_state(cur, &head->st);
1717 		if (err)
1718 			return err;
1719 	}
1720 	if (pop_log)
1721 		bpf_vlog_reset(&env->log, head->log_pos);
1722 	if (insn_idx)
1723 		*insn_idx = head->insn_idx;
1724 	if (prev_insn_idx)
1725 		*prev_insn_idx = head->prev_insn_idx;
1726 	elem = head->next;
1727 	bpf_free_verifier_state(&head->st, false);
1728 	kfree(head);
1729 	env->head = elem;
1730 	env->stack_size--;
1731 	return 0;
1732 }
1733 
1734 static bool error_recoverable_with_nospec(int err)
1735 {
1736 	/* Should only return true for non-fatal errors that are allowed to
1737 	 * occur during speculative verification. For these we can insert a
1738 	 * nospec and the program might still be accepted. Do not include
1739 	 * something like ENOMEM because it is likely to re-occur for the next
1740 	 * architectural path once it has been recovered-from in all speculative
1741 	 * paths.
1742 	 */
1743 	return err == -EPERM || err == -EACCES || err == -EINVAL;
1744 }
1745 
1746 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1747 					     int insn_idx, int prev_insn_idx,
1748 					     bool speculative)
1749 {
1750 	struct bpf_verifier_state *cur = env->cur_state;
1751 	struct bpf_verifier_stack_elem *elem;
1752 	int err;
1753 
1754 	elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT);
1755 	if (!elem)
1756 		return ERR_PTR(-ENOMEM);
1757 
1758 	elem->insn_idx = insn_idx;
1759 	elem->prev_insn_idx = prev_insn_idx;
1760 	elem->next = env->head;
1761 	elem->log_pos = env->log.end_pos;
1762 	env->head = elem;
1763 	env->stack_size++;
1764 	err = bpf_copy_verifier_state(&elem->st, cur);
1765 	if (err)
1766 		return ERR_PTR(-ENOMEM);
1767 	elem->st.speculative |= speculative;
1768 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1769 		verbose(env, "The sequence of %d jumps is too complex.\n",
1770 			env->stack_size);
1771 		return ERR_PTR(-E2BIG);
1772 	}
1773 	if (elem->st.parent) {
1774 		++elem->st.parent->branches;
1775 		/* WARN_ON(branches > 2) technically makes sense here,
1776 		 * but
1777 		 * 1. speculative states will bump 'branches' for non-branch
1778 		 * instructions
1779 		 * 2. is_state_visited() heuristics may decide not to create
1780 		 * a new state for a sequence of branches and all such current
1781 		 * and cloned states will be pointing to a single parent state
1782 		 * which might have large 'branches' count.
1783 		 */
1784 	}
1785 	return &elem->st;
1786 }
1787 
1788 static const char *reg_arg_name(struct bpf_verifier_env *env, argno_t argno)
1789 {
1790 	char *buf = env->tmp_arg_name;
1791 	int len = sizeof(env->tmp_arg_name);
1792 	int arg, regno = reg_from_argno(argno);
1793 
1794 	if (regno >= 0) {
1795 		snprintf(buf, len, "R%d", regno);
1796 	} else {
1797 		arg = arg_from_argno(argno);
1798 		snprintf(buf, len, "*(R11-%u)", (arg - MAX_BPF_FUNC_REG_ARGS) * BPF_REG_SIZE);
1799 	}
1800 
1801 	return buf;
1802 }
1803 
1804 static const int caller_saved[CALLER_SAVED_REGS] = {
1805 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1806 };
1807 
1808 /* This helper doesn't clear reg->id */
1809 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1810 {
1811 	reg->var_off = tnum_const(imm);
1812 	reg->r64 = cnum64_from_urange(imm, imm);
1813 	reg->r32 = cnum32_from_urange((u32)imm, (u32)imm);
1814 }
1815 
1816 /* Mark the unknown part of a register (variable offset or scalar value) as
1817  * known to have the value @imm.
1818  */
1819 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1820 {
1821 	/* Clear off and union(map_ptr, range) */
1822 	memset(((u8 *)reg) + sizeof(reg->type), 0,
1823 	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1824 	reg->id = 0;
1825 	reg->parent_id = 0;
1826 	___mark_reg_known(reg, imm);
1827 }
1828 
1829 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1830 {
1831 	reg->var_off = tnum_const_subreg(reg->var_off, imm);
1832 	reg->r32 = cnum32_from_urange((u32)imm, (u32)imm);
1833 }
1834 
1835 /* Mark the 'variable offset' part of a register as zero.  This should be
1836  * used only on registers holding a pointer type.
1837  */
1838 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1839 {
1840 	__mark_reg_known(reg, 0);
1841 }
1842 
1843 static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1844 {
1845 	__mark_reg_known(reg, 0);
1846 	reg->type = SCALAR_VALUE;
1847 	/* all scalars are assumed imprecise initially (unless unprivileged,
1848 	 * in which case everything is forced to be precise)
1849 	 */
1850 	reg->precise = !env->bpf_capable;
1851 }
1852 
1853 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1854 				struct bpf_reg_state *regs, u32 regno)
1855 {
1856 	__mark_reg_known_zero(regs + regno);
1857 }
1858 
1859 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
1860 			      bool first_slot, int id, int parent_id)
1861 {
1862 	/* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
1863 	 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
1864 	 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
1865 	 */
1866 	__mark_reg_known_zero(reg);
1867 	reg->type = CONST_PTR_TO_DYNPTR;
1868 	/* Give each dynptr a unique id to uniquely associate slices to it. */
1869 	reg->id = id;
1870 	reg->parent_id = parent_id;
1871 	reg->dynptr.type = type;
1872 	reg->dynptr.first_slot = first_slot;
1873 }
1874 
1875 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1876 {
1877 	if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1878 		const struct bpf_map *map = reg->map_ptr;
1879 
1880 		if (map->inner_map_meta) {
1881 			reg->type = CONST_PTR_TO_MAP;
1882 			reg->map_ptr = map->inner_map_meta;
1883 			/* transfer reg's id which is unique for every map_lookup_elem
1884 			 * as UID of the inner map.
1885 			 */
1886 			if (btf_record_has_field(map->inner_map_meta->record,
1887 						 BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
1888 				reg->map_uid = reg->id;
1889 			}
1890 		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1891 			reg->type = PTR_TO_XDP_SOCK;
1892 		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1893 			   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1894 			reg->type = PTR_TO_SOCKET;
1895 		} else {
1896 			reg->type = PTR_TO_MAP_VALUE;
1897 		}
1898 		return;
1899 	}
1900 
1901 	reg->type &= ~PTR_MAYBE_NULL;
1902 }
1903 
1904 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
1905 				struct btf_field_graph_root *ds_head)
1906 {
1907 	__mark_reg_known(&regs[regno], ds_head->node_offset);
1908 	regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
1909 	regs[regno].btf = ds_head->btf;
1910 	regs[regno].btf_id = ds_head->value_btf_id;
1911 }
1912 
1913 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1914 {
1915 	return type_is_pkt_pointer(reg->type);
1916 }
1917 
1918 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1919 {
1920 	return reg_is_pkt_pointer(reg) ||
1921 	       reg->type == PTR_TO_PACKET_END;
1922 }
1923 
1924 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
1925 {
1926 	return base_type(reg->type) == PTR_TO_MEM &&
1927 	       (reg->type &
1928 		(DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META));
1929 }
1930 
1931 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1932 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1933 				    enum bpf_reg_type which)
1934 {
1935 	/* The register can already have a range from prior markings.
1936 	 * This is fine as long as it hasn't been advanced from its
1937 	 * origin.
1938 	 */
1939 	return reg->type == which &&
1940 	       reg->id == 0 &&
1941 	       tnum_equals_const(reg->var_off, 0);
1942 }
1943 
1944 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1945 {
1946 	reg->r32 = CNUM32_UNBOUNDED;
1947 }
1948 
1949 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1950 {
1951 	reg->r64 = CNUM64_UNBOUNDED;
1952 }
1953 
1954 /* Reset the min/max bounds of a register */
1955 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1956 {
1957 	__mark_reg64_unbounded(reg);
1958 	__mark_reg32_unbounded(reg);
1959 }
1960 
1961 static void reset_reg64_and_tnum(struct bpf_reg_state *reg)
1962 {
1963 	__mark_reg64_unbounded(reg);
1964 	reg->var_off = tnum_unknown;
1965 }
1966 
1967 static void reset_reg32_and_tnum(struct bpf_reg_state *reg)
1968 {
1969 	__mark_reg32_unbounded(reg);
1970 	reg->var_off = tnum_unknown;
1971 }
1972 
1973 static struct cnum32 cnum32_from_tnum(struct tnum tnum)
1974 {
1975 	tnum = tnum_subreg(tnum);
1976 	if ((tnum.mask & S32_MIN) || (tnum.value & S32_MIN))
1977 		/* min signed is max(sign bit) | min(other bits) */
1978 		/* max signed is min(sign bit) | max(other bits) */
1979 		return cnum32_from_srange(tnum.value | (tnum.mask & S32_MIN),
1980 					  tnum.value | (tnum.mask & S32_MAX));
1981 	else
1982 		return cnum32_from_urange(tnum.value, (tnum.value | tnum.mask));
1983 }
1984 
1985 static struct cnum64 cnum64_from_tnum(struct tnum tnum)
1986 {
1987 	if ((tnum.mask & S64_MIN) || (tnum.value & S64_MIN))
1988 		/* min signed is max(sign bit) | min(other bits) */
1989 		/* max signed is min(sign bit) | max(other bits) */
1990 		return cnum64_from_srange(tnum.value | (tnum.mask & S64_MIN),
1991 					  tnum.value | (tnum.mask & S64_MAX));
1992 	else
1993 		return cnum64_from_urange(tnum.value, (tnum.value | tnum.mask));
1994 }
1995 
1996 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1997 {
1998 	cnum32_intersect_with(&reg->r32, cnum32_from_tnum(reg->var_off));
1999 }
2000 
2001 static void __update_reg64_bounds(struct bpf_reg_state *reg)
2002 {
2003 	u64 tnum_next, tmax;
2004 	bool umin_in_tnum;
2005 
2006 	cnum64_intersect_with(&reg->r64, cnum64_from_tnum(reg->var_off));
2007 
2008 	/* Check if u64 and tnum overlap in a single value */
2009 	tnum_next = tnum_step(reg->var_off, reg_umin(reg));
2010 	umin_in_tnum = (reg_umin(reg) & ~reg->var_off.mask) == reg->var_off.value;
2011 	tmax = reg->var_off.value | reg->var_off.mask;
2012 	if (umin_in_tnum && tnum_next > reg_umax(reg)) {
2013 		/* The u64 range and the tnum only overlap in umin.
2014 		 * u64:  ---[xxxxxx]-----
2015 		 * tnum: --xx----------x-
2016 		 */
2017 		___mark_reg_known(reg, reg_umin(reg));
2018 	} else if (!umin_in_tnum && tnum_next == tmax) {
2019 		/* The u64 range and the tnum only overlap in the maximum value
2020 		 * represented by the tnum, called tmax.
2021 		 * u64:  ---[xxxxxx]-----
2022 		 * tnum: xx-----x--------
2023 		 */
2024 		___mark_reg_known(reg, tmax);
2025 	} else if (!umin_in_tnum && tnum_next <= reg_umax(reg) &&
2026 		   tnum_step(reg->var_off, tnum_next) > reg_umax(reg)) {
2027 		/* The u64 range and the tnum only overlap in between umin
2028 		 * (excluded) and umax.
2029 		 * u64:  ---[xxxxxx]-----
2030 		 * tnum: xx----x-------x-
2031 		 */
2032 		___mark_reg_known(reg, tnum_next);
2033 	}
2034 }
2035 
2036 static void __update_reg_bounds(struct bpf_reg_state *reg)
2037 {
2038 	__update_reg32_bounds(reg);
2039 	__update_reg64_bounds(reg);
2040 }
2041 
2042 static void deduce_bounds_32_from_64(struct bpf_reg_state *reg)
2043 {
2044 	cnum32_intersect_with(&reg->r32, cnum32_from_cnum64(reg->r64));
2045 }
2046 
2047 static void deduce_bounds_64_from_32(struct bpf_reg_state *reg)
2048 {
2049 	reg->r64 = cnum64_cnum32_intersect(reg->r64, reg->r32);
2050 }
2051 
2052 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2053 {
2054 	deduce_bounds_32_from_64(reg);
2055 	deduce_bounds_64_from_32(reg);
2056 }
2057 
2058 /* Attempts to improve var_off based on unsigned min/max information */
2059 static void __reg_bound_offset(struct bpf_reg_state *reg)
2060 {
2061 	struct tnum var64_off = tnum_intersect(reg->var_off,
2062 					       tnum_range(reg_umin(reg),
2063 							  reg_umax(reg)));
2064 	struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off),
2065 					       tnum_range(reg_u32_min(reg),
2066 							  reg_u32_max(reg)));
2067 
2068 	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
2069 }
2070 
2071 static bool range_bounds_violation(struct bpf_reg_state *reg);
2072 
2073 static void reg_bounds_sync(struct bpf_reg_state *reg)
2074 {
2075 	/* If the input reg_state is invalid, we can exit early */
2076 	if (range_bounds_violation(reg))
2077 		return;
2078 	/* We might have learned new bounds from the var_off. */
2079 	__update_reg_bounds(reg);
2080 	/* We might have learned something about the sign bit. */
2081 	__reg_deduce_bounds(reg);
2082 	__reg_deduce_bounds(reg);
2083 	/* We might have learned some bits from the bounds. */
2084 	__reg_bound_offset(reg);
2085 	/* Intersecting with the old var_off might have improved our bounds
2086 	 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2087 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
2088 	 */
2089 	__update_reg_bounds(reg);
2090 }
2091 
2092 static bool const_tnum_range_mismatch(struct bpf_reg_state *reg)
2093 {
2094 	if (!tnum_is_const(reg->var_off))
2095 		return false;
2096 
2097 	return !cnum64_is_const(reg->r64) || reg->r64.base != reg->var_off.value;
2098 }
2099 
2100 static bool const_tnum_range_mismatch_32(struct bpf_reg_state *reg)
2101 {
2102 	if (!tnum_subreg_is_const(reg->var_off))
2103 		return false;
2104 
2105 	return !cnum32_is_const(reg->r32) || reg->r32.base != tnum_subreg(reg->var_off).value;
2106 }
2107 
2108 static bool range_bounds_violation(struct bpf_reg_state *reg)
2109 {
2110 	return cnum32_is_empty(reg->r32) || cnum64_is_empty(reg->r64);
2111 }
2112 
2113 static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
2114 				   struct bpf_reg_state *reg, const char *ctx)
2115 {
2116 	const char *msg;
2117 
2118 	if (range_bounds_violation(reg)) {
2119 		msg = "range bounds violation";
2120 		goto out;
2121 	}
2122 
2123 	if (const_tnum_range_mismatch(reg)) {
2124 		msg = "const tnum out of sync with range bounds";
2125 		goto out;
2126 	}
2127 
2128 	if (const_tnum_range_mismatch_32(reg)) {
2129 		msg = "const subreg tnum out of sync with range bounds";
2130 		goto out;
2131 	}
2132 
2133 	return 0;
2134 out:
2135 	verifier_bug(env, "REG INVARIANTS VIOLATION (%s): %s r64={.base=%#llx, .size=%#llx} "
2136 		     "r32={.base=%#x, .size=%#x} var_off=(%#llx, %#llx)",
2137 		     ctx, msg,
2138 		     reg->r64.base, reg->r64.size,
2139 		     reg->r32.base, reg->r32.size,
2140 		     reg->var_off.value, reg->var_off.mask);
2141 	if (env->test_reg_invariants)
2142 		return -EFAULT;
2143 	__mark_reg_unbounded(reg);
2144 	return 0;
2145 }
2146 
2147 /* Mark a register as having a completely unknown (scalar) value. */
2148 void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
2149 {
2150 	s32 subreg_def = reg->subreg_def;
2151 
2152 	memset(reg, 0, sizeof(*reg));
2153 	reg->type = SCALAR_VALUE;
2154 	reg->var_off = tnum_unknown;
2155 	reg->subreg_def = subreg_def;
2156 	__mark_reg_unbounded(reg);
2157 }
2158 
2159 /* Mark a register as having a completely unknown (scalar) value,
2160  * initialize .precise as true when not bpf capable.
2161  */
2162 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2163 			       struct bpf_reg_state *reg)
2164 {
2165 	bpf_mark_reg_unknown_imprecise(reg);
2166 	reg->precise = !env->bpf_capable;
2167 }
2168 
2169 static void mark_reg_unknown(struct bpf_verifier_env *env,
2170 			     struct bpf_reg_state *regs, u32 regno)
2171 {
2172 	__mark_reg_unknown(env, regs + regno);
2173 }
2174 
2175 static int __mark_reg_s32_range(struct bpf_verifier_env *env,
2176 				struct bpf_reg_state *regs,
2177 				u32 regno,
2178 				s32 s32_min,
2179 				s32 s32_max)
2180 {
2181 	struct bpf_reg_state *reg = regs + regno;
2182 
2183 	reg_set_srange32(reg,
2184 			 max_t(s32, reg_s32_min(reg), s32_min),
2185 			 min_t(s32, reg_s32_max(reg), s32_max));
2186 	reg_set_srange64(reg,
2187 			 max_t(s64, reg_smin(reg), s32_min),
2188 			 min_t(s64, reg_smax(reg), s32_max));
2189 
2190 	reg_bounds_sync(reg);
2191 
2192 	return reg_bounds_sanity_check(env, reg, "s32_range");
2193 }
2194 
2195 void bpf_mark_reg_not_init(const struct bpf_verifier_env *env,
2196 			   struct bpf_reg_state *reg)
2197 {
2198 	__mark_reg_unknown(env, reg);
2199 	reg->type = NOT_INIT;
2200 }
2201 
2202 static int mark_btf_ld_reg(struct bpf_verifier_env *env,
2203 			   struct bpf_reg_state *regs, u32 regno,
2204 			   enum bpf_reg_type reg_type,
2205 			   struct btf *btf, u32 btf_id,
2206 			   enum bpf_type_flag flag)
2207 {
2208 	switch (reg_type) {
2209 	case SCALAR_VALUE:
2210 		mark_reg_unknown(env, regs, regno);
2211 		return 0;
2212 	case PTR_TO_BTF_ID:
2213 		mark_reg_known_zero(env, regs, regno);
2214 		regs[regno].type = PTR_TO_BTF_ID | flag;
2215 		regs[regno].btf = btf;
2216 		regs[regno].btf_id = btf_id;
2217 		if (type_may_be_null(flag))
2218 			regs[regno].id = ++env->id_gen;
2219 		return 0;
2220 	case PTR_TO_MEM:
2221 		mark_reg_known_zero(env, regs, regno);
2222 		regs[regno].type = PTR_TO_MEM | flag;
2223 		regs[regno].mem_size = 0;
2224 		return 0;
2225 	default:
2226 		verifier_bug(env, "unexpected reg_type %d in %s\n", reg_type, __func__);
2227 		return -EFAULT;
2228 	}
2229 }
2230 
2231 #define DEF_NOT_SUBREG	(0)
2232 static void init_reg_state(struct bpf_verifier_env *env,
2233 			   struct bpf_func_state *state)
2234 {
2235 	struct bpf_reg_state *regs = state->regs;
2236 	int i;
2237 
2238 	for (i = 0; i < MAX_BPF_REG; i++) {
2239 		bpf_mark_reg_not_init(env, &regs[i]);
2240 		regs[i].subreg_def = DEF_NOT_SUBREG;
2241 	}
2242 
2243 	/* frame pointer */
2244 	regs[BPF_REG_FP].type = PTR_TO_STACK;
2245 	mark_reg_known_zero(env, regs, BPF_REG_FP);
2246 	regs[BPF_REG_FP].frameno = state->frameno;
2247 }
2248 
2249 static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
2250 {
2251 	/*
2252 	 * return_32bit is set to false by default and set explicitly
2253 	 * by the caller when necessary.
2254 	 */
2255 	return (struct bpf_retval_range){ minval, maxval, false };
2256 }
2257 
2258 static void init_func_state(struct bpf_verifier_env *env,
2259 			    struct bpf_func_state *state,
2260 			    int callsite, int frameno, int subprogno)
2261 {
2262 	state->callsite = callsite;
2263 	state->frameno = frameno;
2264 	state->subprogno = subprogno;
2265 	state->callback_ret_range = retval_range(0, 0);
2266 	init_reg_state(env, state);
2267 	mark_verifier_state_scratched(env);
2268 }
2269 
2270 /* Similar to push_stack(), but for async callbacks */
2271 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
2272 						int insn_idx, int prev_insn_idx,
2273 						int subprog, bool is_sleepable)
2274 {
2275 	struct bpf_verifier_stack_elem *elem;
2276 	struct bpf_func_state *frame;
2277 
2278 	elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT);
2279 	if (!elem)
2280 		return ERR_PTR(-ENOMEM);
2281 
2282 	elem->insn_idx = insn_idx;
2283 	elem->prev_insn_idx = prev_insn_idx;
2284 	elem->next = env->head;
2285 	elem->log_pos = env->log.end_pos;
2286 	env->head = elem;
2287 	env->stack_size++;
2288 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2289 		verbose(env,
2290 			"The sequence of %d jumps is too complex for async cb.\n",
2291 			env->stack_size);
2292 		return ERR_PTR(-E2BIG);
2293 	}
2294 	/* Unlike push_stack() do not bpf_copy_verifier_state().
2295 	 * The caller state doesn't matter.
2296 	 * This is async callback. It starts in a fresh stack.
2297 	 * Initialize it similar to do_check_common().
2298 	 */
2299 	elem->st.branches = 1;
2300 	elem->st.in_sleepable = is_sleepable;
2301 	frame = kzalloc_obj(*frame, GFP_KERNEL_ACCOUNT);
2302 	if (!frame)
2303 		return ERR_PTR(-ENOMEM);
2304 	init_func_state(env, frame,
2305 			BPF_MAIN_FUNC /* callsite */,
2306 			0 /* frameno within this callchain */,
2307 			subprog /* subprog number within this prog */);
2308 	elem->st.frame[0] = frame;
2309 	return &elem->st;
2310 }
2311 
2312 
2313 static int cmp_subprogs(const void *a, const void *b)
2314 {
2315 	return ((struct bpf_subprog_info *)a)->start -
2316 	       ((struct bpf_subprog_info *)b)->start;
2317 }
2318 
2319 /* Find subprogram that contains instruction at 'off' */
2320 struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off)
2321 {
2322 	struct bpf_subprog_info *vals = env->subprog_info;
2323 	int l, r, m;
2324 
2325 	if (off >= env->prog->len || off < 0 || env->subprog_cnt == 0)
2326 		return NULL;
2327 
2328 	l = 0;
2329 	r = env->subprog_cnt - 1;
2330 	while (l < r) {
2331 		m = l + (r - l + 1) / 2;
2332 		if (vals[m].start <= off)
2333 			l = m;
2334 		else
2335 			r = m - 1;
2336 	}
2337 	return &vals[l];
2338 }
2339 
2340 /* Find subprogram that starts exactly at 'off' */
2341 int bpf_find_subprog(struct bpf_verifier_env *env, int off)
2342 {
2343 	struct bpf_subprog_info *p;
2344 
2345 	p = bpf_find_containing_subprog(env, off);
2346 	if (!p || p->start != off)
2347 		return -ENOENT;
2348 	return p - env->subprog_info;
2349 }
2350 
2351 static int add_subprog(struct bpf_verifier_env *env, int off)
2352 {
2353 	int insn_cnt = env->prog->len;
2354 	int ret;
2355 
2356 	if (off >= insn_cnt || off < 0) {
2357 		verbose(env, "call to invalid destination\n");
2358 		return -EINVAL;
2359 	}
2360 	ret = bpf_find_subprog(env, off);
2361 	if (ret >= 0)
2362 		return ret;
2363 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2364 		verbose(env, "too many subprograms\n");
2365 		return -E2BIG;
2366 	}
2367 	/* determine subprog starts. The end is one before the next starts */
2368 	env->subprog_info[env->subprog_cnt++].start = off;
2369 	sort(env->subprog_info, env->subprog_cnt,
2370 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
2371 	return env->subprog_cnt - 1;
2372 }
2373 
2374 static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
2375 {
2376 	struct bpf_prog_aux *aux = env->prog->aux;
2377 	struct btf *btf = aux->btf;
2378 	const struct btf_type *t;
2379 	u32 main_btf_id, id;
2380 	const char *name;
2381 	int ret, i;
2382 
2383 	/* Non-zero func_info_cnt implies valid btf */
2384 	if (!aux->func_info_cnt)
2385 		return 0;
2386 	main_btf_id = aux->func_info[0].type_id;
2387 
2388 	t = btf_type_by_id(btf, main_btf_id);
2389 	if (!t) {
2390 		verbose(env, "invalid btf id for main subprog in func_info\n");
2391 		return -EINVAL;
2392 	}
2393 
2394 	name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:");
2395 	if (IS_ERR(name)) {
2396 		ret = PTR_ERR(name);
2397 		/* If there is no tag present, there is no exception callback */
2398 		if (ret == -ENOENT)
2399 			ret = 0;
2400 		else if (ret == -EEXIST)
2401 			verbose(env, "multiple exception callback tags for main subprog\n");
2402 		return ret;
2403 	}
2404 
2405 	ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC);
2406 	if (ret < 0) {
2407 		verbose(env, "exception callback '%s' could not be found in BTF\n", name);
2408 		return ret;
2409 	}
2410 	id = ret;
2411 	t = btf_type_by_id(btf, id);
2412 	if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
2413 		verbose(env, "exception callback '%s' must have global linkage\n", name);
2414 		return -EINVAL;
2415 	}
2416 	ret = 0;
2417 	for (i = 0; i < aux->func_info_cnt; i++) {
2418 		if (aux->func_info[i].type_id != id)
2419 			continue;
2420 		ret = aux->func_info[i].insn_off;
2421 		/* Further func_info and subprog checks will also happen
2422 		 * later, so assume this is the right insn_off for now.
2423 		 */
2424 		if (!ret) {
2425 			verbose(env, "invalid exception callback insn_off in func_info: 0\n");
2426 			ret = -EINVAL;
2427 		}
2428 	}
2429 	if (!ret) {
2430 		verbose(env, "exception callback type id not found in func_info\n");
2431 		ret = -EINVAL;
2432 	}
2433 	return ret;
2434 }
2435 
2436 #define MAX_KFUNC_BTFS	256
2437 
2438 struct bpf_kfunc_btf {
2439 	struct btf *btf;
2440 	struct module *module;
2441 	u16 offset;
2442 };
2443 
2444 struct bpf_kfunc_btf_tab {
2445 	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2446 	u32 nr_descs;
2447 };
2448 
2449 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
2450 {
2451 	const struct bpf_kfunc_desc *d0 = a;
2452 	const struct bpf_kfunc_desc *d1 = b;
2453 
2454 	/* func_id is not greater than BTF_MAX_TYPE */
2455 	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2456 }
2457 
2458 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
2459 {
2460 	const struct bpf_kfunc_btf *d0 = a;
2461 	const struct bpf_kfunc_btf *d1 = b;
2462 
2463 	return d0->offset - d1->offset;
2464 }
2465 
2466 static struct bpf_kfunc_desc *
2467 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
2468 {
2469 	struct bpf_kfunc_desc desc = {
2470 		.func_id = func_id,
2471 		.offset = offset,
2472 	};
2473 	struct bpf_kfunc_desc_tab *tab;
2474 
2475 	tab = prog->aux->kfunc_tab;
2476 	return bsearch(&desc, tab->descs, tab->nr_descs,
2477 		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
2478 }
2479 
2480 int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
2481 		       u16 btf_fd_idx, u8 **func_addr)
2482 {
2483 	const struct bpf_kfunc_desc *desc;
2484 
2485 	desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
2486 	if (!desc)
2487 		return -EFAULT;
2488 
2489 	*func_addr = (u8 *)desc->addr;
2490 	return 0;
2491 }
2492 
2493 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
2494 					 s16 offset)
2495 {
2496 	struct bpf_kfunc_btf kf_btf = { .offset = offset };
2497 	struct bpf_kfunc_btf_tab *tab;
2498 	struct bpf_kfunc_btf *b;
2499 	struct module *mod;
2500 	struct btf *btf;
2501 	int btf_fd;
2502 
2503 	tab = env->prog->aux->kfunc_btf_tab;
2504 	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
2505 		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
2506 	if (!b) {
2507 		if (tab->nr_descs == MAX_KFUNC_BTFS) {
2508 			verbose(env, "too many different module BTFs\n");
2509 			return ERR_PTR(-E2BIG);
2510 		}
2511 
2512 		if (bpfptr_is_null(env->fd_array)) {
2513 			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
2514 			return ERR_PTR(-EPROTO);
2515 		}
2516 
2517 		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
2518 					    offset * sizeof(btf_fd),
2519 					    sizeof(btf_fd)))
2520 			return ERR_PTR(-EFAULT);
2521 
2522 		btf = btf_get_by_fd(btf_fd);
2523 		if (IS_ERR(btf)) {
2524 			verbose(env, "invalid module BTF fd specified\n");
2525 			return btf;
2526 		}
2527 
2528 		if (!btf_is_module(btf)) {
2529 			verbose(env, "BTF fd for kfunc is not a module BTF\n");
2530 			btf_put(btf);
2531 			return ERR_PTR(-EINVAL);
2532 		}
2533 
2534 		mod = btf_try_get_module(btf);
2535 		if (!mod) {
2536 			btf_put(btf);
2537 			return ERR_PTR(-ENXIO);
2538 		}
2539 
2540 		b = &tab->descs[tab->nr_descs++];
2541 		b->btf = btf;
2542 		b->module = mod;
2543 		b->offset = offset;
2544 
2545 		/* sort() reorders entries by value, so b may no longer point
2546 		 * to the right entry after this
2547 		 */
2548 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2549 		     kfunc_btf_cmp_by_off, NULL);
2550 	} else {
2551 		btf = b->btf;
2552 	}
2553 
2554 	return btf;
2555 }
2556 
2557 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2558 {
2559 	if (!tab)
2560 		return;
2561 
2562 	while (tab->nr_descs--) {
2563 		module_put(tab->descs[tab->nr_descs].module);
2564 		btf_put(tab->descs[tab->nr_descs].btf);
2565 	}
2566 	kfree(tab);
2567 }
2568 
2569 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
2570 {
2571 	if (offset) {
2572 		if (offset < 0) {
2573 			/* In the future, this can be allowed to increase limit
2574 			 * of fd index into fd_array, interpreted as u16.
2575 			 */
2576 			verbose(env, "negative offset disallowed for kernel module function call\n");
2577 			return ERR_PTR(-EINVAL);
2578 		}
2579 
2580 		return __find_kfunc_desc_btf(env, offset);
2581 	}
2582 	return btf_vmlinux ?: ERR_PTR(-ENOENT);
2583 }
2584 
2585 #define KF_IMPL_SUFFIX "_impl"
2586 
2587 static const struct btf_type *find_kfunc_impl_proto(struct bpf_verifier_env *env,
2588 						    struct btf *btf,
2589 						    const char *func_name)
2590 {
2591 	char *buf = env->tmp_str_buf;
2592 	const struct btf_type *func;
2593 	s32 impl_id;
2594 	int len;
2595 
2596 	len = snprintf(buf, TMP_STR_BUF_LEN, "%s%s", func_name, KF_IMPL_SUFFIX);
2597 	if (len < 0 || len >= TMP_STR_BUF_LEN) {
2598 		verbose(env, "function name %s%s is too long\n", func_name, KF_IMPL_SUFFIX);
2599 		return NULL;
2600 	}
2601 
2602 	impl_id = btf_find_by_name_kind(btf, buf, BTF_KIND_FUNC);
2603 	if (impl_id <= 0) {
2604 		verbose(env, "cannot find function %s in BTF\n", buf);
2605 		return NULL;
2606 	}
2607 
2608 	func = btf_type_by_id(btf, impl_id);
2609 
2610 	return btf_type_by_id(btf, func->type);
2611 }
2612 
2613 static int fetch_kfunc_meta(struct bpf_verifier_env *env,
2614 			    s32 func_id,
2615 			    s16 offset,
2616 			    struct bpf_kfunc_meta *kfunc)
2617 {
2618 	const struct btf_type *func, *func_proto;
2619 	const char *func_name;
2620 	u32 *kfunc_flags;
2621 	struct btf *btf;
2622 
2623 	if (func_id <= 0) {
2624 		verbose(env, "invalid kernel function btf_id %d\n", func_id);
2625 		return -EINVAL;
2626 	}
2627 
2628 	btf = find_kfunc_desc_btf(env, offset);
2629 	if (IS_ERR(btf)) {
2630 		verbose(env, "failed to find BTF for kernel function\n");
2631 		return PTR_ERR(btf);
2632 	}
2633 
2634 	/*
2635 	 * Note that kfunc_flags may be NULL at this point, which
2636 	 * means that we couldn't find func_id in any relevant
2637 	 * kfunc_id_set. This most likely indicates an invalid kfunc
2638 	 * call.  However we don't fail with an error here,
2639 	 * and let the caller decide what to do with NULL kfunc->flags.
2640 	 */
2641 	kfunc_flags = btf_kfunc_flags(btf, func_id, env->prog);
2642 
2643 	func = btf_type_by_id(btf, func_id);
2644 	if (!func || !btf_type_is_func(func)) {
2645 		verbose(env, "kernel btf_id %d is not a function\n", func_id);
2646 		return -EINVAL;
2647 	}
2648 
2649 	func_name = btf_name_by_offset(btf, func->name_off);
2650 
2651 	/*
2652 	 * An actual prototype of a kfunc with KF_IMPLICIT_ARGS flag
2653 	 * can be found through the counterpart _impl kfunc.
2654 	 */
2655 	if (kfunc_flags && (*kfunc_flags & KF_IMPLICIT_ARGS))
2656 		func_proto = find_kfunc_impl_proto(env, btf, func_name);
2657 	else
2658 		func_proto = btf_type_by_id(btf, func->type);
2659 
2660 	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2661 		verbose(env, "kernel function btf_id %d does not have a valid func_proto\n",
2662 			func_id);
2663 		return -EINVAL;
2664 	}
2665 
2666 	memset(kfunc, 0, sizeof(*kfunc));
2667 	kfunc->btf = btf;
2668 	kfunc->id = func_id;
2669 	kfunc->name = func_name;
2670 	kfunc->proto = func_proto;
2671 	kfunc->flags = kfunc_flags;
2672 
2673 	return 0;
2674 }
2675 
2676 int bpf_add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, u16 offset)
2677 {
2678 	struct bpf_kfunc_btf_tab *btf_tab;
2679 	struct btf_func_model func_model;
2680 	struct bpf_kfunc_desc_tab *tab;
2681 	struct bpf_prog_aux *prog_aux;
2682 	struct bpf_kfunc_meta kfunc;
2683 	struct bpf_kfunc_desc *desc;
2684 	unsigned long addr;
2685 	int err;
2686 
2687 	prog_aux = env->prog->aux;
2688 	tab = prog_aux->kfunc_tab;
2689 	btf_tab = prog_aux->kfunc_btf_tab;
2690 	if (!tab) {
2691 		if (!btf_vmlinux) {
2692 			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2693 			return -ENOTSUPP;
2694 		}
2695 
2696 		if (!env->prog->jit_requested) {
2697 			verbose(env, "JIT is required for calling kernel function\n");
2698 			return -ENOTSUPP;
2699 		}
2700 
2701 		if (!bpf_jit_supports_kfunc_call()) {
2702 			verbose(env, "JIT does not support calling kernel function\n");
2703 			return -ENOTSUPP;
2704 		}
2705 
2706 		if (!env->prog->gpl_compatible) {
2707 			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
2708 			return -EINVAL;
2709 		}
2710 
2711 		tab = kzalloc_obj(*tab, GFP_KERNEL_ACCOUNT);
2712 		if (!tab)
2713 			return -ENOMEM;
2714 		prog_aux->kfunc_tab = tab;
2715 	}
2716 
2717 	/* func_id == 0 is always invalid, but instead of returning an error, be
2718 	 * conservative and wait until the code elimination pass before returning
2719 	 * error, so that invalid calls that get pruned out can be in BPF programs
2720 	 * loaded from userspace.  It is also required that offset be untouched
2721 	 * for such calls.
2722 	 */
2723 	if (!func_id && !offset)
2724 		return 0;
2725 
2726 	if (!btf_tab && offset) {
2727 		btf_tab = kzalloc_obj(*btf_tab, GFP_KERNEL_ACCOUNT);
2728 		if (!btf_tab)
2729 			return -ENOMEM;
2730 		prog_aux->kfunc_btf_tab = btf_tab;
2731 	}
2732 
2733 	if (find_kfunc_desc(env->prog, func_id, offset))
2734 		return 0;
2735 
2736 	if (tab->nr_descs == MAX_KFUNC_DESCS) {
2737 		verbose(env, "too many different kernel function calls\n");
2738 		return -E2BIG;
2739 	}
2740 
2741 	err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
2742 	if (err)
2743 		return err;
2744 
2745 	addr = kallsyms_lookup_name(kfunc.name);
2746 	if (!addr) {
2747 		verbose(env, "cannot find address for kernel function %s\n", kfunc.name);
2748 		return -EINVAL;
2749 	}
2750 
2751 	if (bpf_dev_bound_kfunc_id(func_id)) {
2752 		err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
2753 		if (err)
2754 			return err;
2755 	}
2756 
2757 	err = btf_distill_func_proto(&env->log, kfunc.btf, kfunc.proto, kfunc.name, &func_model);
2758 	if (err)
2759 		return err;
2760 
2761 	desc = &tab->descs[tab->nr_descs++];
2762 	desc->func_id = func_id;
2763 	desc->offset = offset;
2764 	desc->addr = addr;
2765 	desc->func_model = func_model;
2766 	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2767 	     kfunc_desc_cmp_by_id_off, NULL);
2768 	return 0;
2769 }
2770 
2771 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
2772 {
2773 	return !!prog->aux->kfunc_tab;
2774 }
2775 
2776 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
2777 {
2778 	struct bpf_subprog_info *subprog = env->subprog_info;
2779 	int i, ret, insn_cnt = env->prog->len, ex_cb_insn;
2780 	struct bpf_insn *insn = env->prog->insnsi;
2781 
2782 	/* Add entry function. */
2783 	ret = add_subprog(env, 0);
2784 	if (ret)
2785 		return ret;
2786 
2787 	for (i = 0; i < insn_cnt; i++, insn++) {
2788 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2789 		    !bpf_pseudo_kfunc_call(insn))
2790 			continue;
2791 
2792 		if (!env->bpf_capable) {
2793 			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
2794 			return -EPERM;
2795 		}
2796 
2797 		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
2798 			ret = add_subprog(env, i + insn->imm + 1);
2799 		else
2800 			ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
2801 
2802 		if (ret < 0)
2803 			return ret;
2804 	}
2805 
2806 	ret = bpf_find_exception_callback_insn_off(env);
2807 	if (ret < 0)
2808 		return ret;
2809 	ex_cb_insn = ret;
2810 
2811 	/* If ex_cb_insn > 0, this means that the main program has a subprog
2812 	 * marked using BTF decl tag to serve as the exception callback.
2813 	 */
2814 	if (ex_cb_insn) {
2815 		ret = add_subprog(env, ex_cb_insn);
2816 		if (ret < 0)
2817 			return ret;
2818 		for (i = 1; i < env->subprog_cnt; i++) {
2819 			if (env->subprog_info[i].start != ex_cb_insn)
2820 				continue;
2821 			env->exception_callback_subprog = i;
2822 			bpf_mark_subprog_exc_cb(env, i);
2823 			break;
2824 		}
2825 	}
2826 
2827 	/* Add a fake 'exit' subprog which could simplify subprog iteration
2828 	 * logic. 'subprog_cnt' should not be increased.
2829 	 */
2830 	subprog[env->subprog_cnt].start = insn_cnt;
2831 
2832 	if (env->log.level & BPF_LOG_LEVEL2)
2833 		for (i = 0; i < env->subprog_cnt; i++)
2834 			verbose(env, "func#%d @%d\n", i, subprog[i].start);
2835 
2836 	return 0;
2837 }
2838 
2839 static int check_subprogs(struct bpf_verifier_env *env)
2840 {
2841 	int i, subprog_start, subprog_end, off, cur_subprog = 0;
2842 	struct bpf_subprog_info *subprog = env->subprog_info;
2843 	struct bpf_insn *insn = env->prog->insnsi;
2844 	int insn_cnt = env->prog->len;
2845 
2846 	/* now check that all jumps are within the same subprog */
2847 	subprog_start = subprog[cur_subprog].start;
2848 	subprog_end = subprog[cur_subprog + 1].start;
2849 	for (i = 0; i < insn_cnt; i++) {
2850 		u8 code = insn[i].code;
2851 
2852 		if (code == (BPF_JMP | BPF_CALL) &&
2853 		    insn[i].src_reg == 0 &&
2854 		    insn[i].imm == BPF_FUNC_tail_call) {
2855 			subprog[cur_subprog].has_tail_call = true;
2856 			subprog[cur_subprog].tail_call_reachable = true;
2857 		}
2858 		if (BPF_CLASS(code) == BPF_LD &&
2859 		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2860 			subprog[cur_subprog].has_ld_abs = true;
2861 		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2862 			goto next;
2863 		if (BPF_OP(code) == BPF_CALL)
2864 			goto next;
2865 		if (BPF_OP(code) == BPF_EXIT) {
2866 			subprog[cur_subprog].exit_idx = i;
2867 			goto next;
2868 		}
2869 		off = i + bpf_jmp_offset(&insn[i]) + 1;
2870 		if (off < subprog_start || off >= subprog_end) {
2871 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
2872 			return -EINVAL;
2873 		}
2874 next:
2875 		if (i == subprog_end - 1) {
2876 			/* to avoid fall-through from one subprog into another
2877 			 * the last insn of the subprog should be either exit
2878 			 * or unconditional jump back or bpf_throw call
2879 			 */
2880 			if (code != (BPF_JMP | BPF_EXIT) &&
2881 			    code != (BPF_JMP32 | BPF_JA) &&
2882 			    code != (BPF_JMP | BPF_JA)) {
2883 				verbose(env, "last insn is not an exit or jmp\n");
2884 				return -EINVAL;
2885 			}
2886 			subprog_start = subprog_end;
2887 			cur_subprog++;
2888 			if (cur_subprog < env->subprog_cnt)
2889 				subprog_end = subprog[cur_subprog + 1].start;
2890 		}
2891 	}
2892 	return 0;
2893 }
2894 
2895 /*
2896  * Sort subprogs in topological order so that leaf subprogs come first and
2897  * their callers come later. This is a DFS post-order traversal of the call
2898  * graph. Scan only reachable instructions (those in the computed postorder) of
2899  * the current subprog to discover callees (direct subprogs and sync
2900  * callbacks).
2901  */
2902 static int sort_subprogs_topo(struct bpf_verifier_env *env)
2903 {
2904 	struct bpf_subprog_info *si = env->subprog_info;
2905 	int *insn_postorder = env->cfg.insn_postorder;
2906 	struct bpf_insn *insn = env->prog->insnsi;
2907 	int cnt = env->subprog_cnt;
2908 	int *dfs_stack = NULL;
2909 	int top = 0, order = 0;
2910 	int i, ret = 0;
2911 	u8 *color = NULL;
2912 
2913 	color = kvzalloc_objs(*color, cnt, GFP_KERNEL_ACCOUNT);
2914 	dfs_stack = kvmalloc_objs(*dfs_stack, cnt, GFP_KERNEL_ACCOUNT);
2915 	if (!color || !dfs_stack) {
2916 		ret = -ENOMEM;
2917 		goto out;
2918 	}
2919 
2920 	/*
2921 	 * DFS post-order traversal.
2922 	 * Color values: 0 = unvisited, 1 = on stack, 2 = done.
2923 	 */
2924 	for (i = 0; i < cnt; i++) {
2925 		if (color[i])
2926 			continue;
2927 		color[i] = 1;
2928 		dfs_stack[top++] = i;
2929 
2930 		while (top > 0) {
2931 			int cur = dfs_stack[top - 1];
2932 			int po_start = si[cur].postorder_start;
2933 			int po_end = si[cur + 1].postorder_start;
2934 			bool pushed = false;
2935 			int j;
2936 
2937 			for (j = po_start; j < po_end; j++) {
2938 				int idx = insn_postorder[j];
2939 				int callee;
2940 
2941 				if (!bpf_pseudo_call(&insn[idx]) && !bpf_pseudo_func(&insn[idx]))
2942 					continue;
2943 				callee = bpf_find_subprog(env, idx + insn[idx].imm + 1);
2944 				if (callee < 0) {
2945 					ret = -EFAULT;
2946 					goto out;
2947 				}
2948 				if (color[callee] == 2)
2949 					continue;
2950 				if (color[callee] == 1) {
2951 					if (bpf_pseudo_func(&insn[idx]))
2952 						continue;
2953 					verbose(env, "recursive call from %s() to %s()\n",
2954 						subprog_name(env, cur),
2955 						subprog_name(env, callee));
2956 					ret = -EINVAL;
2957 					goto out;
2958 				}
2959 				color[callee] = 1;
2960 				dfs_stack[top++] = callee;
2961 				pushed = true;
2962 				break;
2963 			}
2964 
2965 			if (!pushed) {
2966 				color[cur] = 2;
2967 				env->subprog_topo_order[order++] = cur;
2968 				top--;
2969 			}
2970 		}
2971 	}
2972 
2973 	if (env->log.level & BPF_LOG_LEVEL2)
2974 		for (i = 0; i < cnt; i++)
2975 			verbose(env, "topo_order[%d] = %s\n",
2976 				i, subprog_name(env, env->subprog_topo_order[i]));
2977 out:
2978 	kvfree(dfs_stack);
2979 	kvfree(color);
2980 	return ret;
2981 }
2982 
2983 static void mark_stack_slots_scratched(struct bpf_verifier_env *env,
2984 				       int spi, int nr_slots)
2985 {
2986 	int i;
2987 
2988 	for (i = 0; i < nr_slots; i++)
2989 		mark_stack_slot_scratched(env, spi - i);
2990 }
2991 
2992 /* This function is supposed to be used by the following 32-bit optimization
2993  * code only. It returns TRUE if the source or destination register operates
2994  * on 64-bit, otherwise return FALSE.
2995  */
2996 bool bpf_is_reg64(struct bpf_insn *insn,
2997 	      u32 regno, struct bpf_reg_state *reg, enum bpf_reg_arg_type t)
2998 {
2999 	u8 code, class, op;
3000 
3001 	code = insn->code;
3002 	class = BPF_CLASS(code);
3003 	op = BPF_OP(code);
3004 	if (class == BPF_JMP) {
3005 		/* BPF_EXIT for "main" will reach here. Return TRUE
3006 		 * conservatively.
3007 		 */
3008 		if (op == BPF_EXIT)
3009 			return true;
3010 		if (op == BPF_CALL) {
3011 			/* BPF to BPF call will reach here because of marking
3012 			 * caller saved clobber with DST_OP_NO_MARK for which we
3013 			 * don't care the register def because they are anyway
3014 			 * marked as NOT_INIT already.
3015 			 */
3016 			if (insn->src_reg == BPF_PSEUDO_CALL)
3017 				return false;
3018 			/* Helper call will reach here because of arg type
3019 			 * check, conservatively return TRUE.
3020 			 */
3021 			if (t == SRC_OP)
3022 				return true;
3023 
3024 			return false;
3025 		}
3026 	}
3027 
3028 	if (class == BPF_ALU64 && op == BPF_END && (insn->imm == 16 || insn->imm == 32))
3029 		return false;
3030 
3031 	if (class == BPF_ALU64 || class == BPF_JMP ||
3032 	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
3033 		return true;
3034 
3035 	if (class == BPF_ALU || class == BPF_JMP32)
3036 		return false;
3037 
3038 	if (class == BPF_LDX) {
3039 		if (t != SRC_OP)
3040 			return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX;
3041 		/* LDX source must be ptr. */
3042 		return true;
3043 	}
3044 
3045 	if (class == BPF_STX) {
3046 		/* BPF_STX (including atomic variants) has one or more source
3047 		 * operands, one of which is a ptr. Check whether the caller is
3048 		 * asking about it.
3049 		 */
3050 		if (t == SRC_OP && reg->type != SCALAR_VALUE)
3051 			return true;
3052 		return BPF_SIZE(code) == BPF_DW;
3053 	}
3054 
3055 	if (class == BPF_LD) {
3056 		u8 mode = BPF_MODE(code);
3057 
3058 		/* LD_IMM64 */
3059 		if (mode == BPF_IMM)
3060 			return true;
3061 
3062 		/* Both LD_IND and LD_ABS return 32-bit data. */
3063 		if (t != SRC_OP)
3064 			return  false;
3065 
3066 		/* Implicit ctx ptr. */
3067 		if (regno == BPF_REG_6)
3068 			return true;
3069 
3070 		/* Explicit source could be any width. */
3071 		return true;
3072 	}
3073 
3074 	if (class == BPF_ST)
3075 		/* The only source register for BPF_ST is a ptr. */
3076 		return true;
3077 
3078 	/* Conservatively return true at default. */
3079 	return true;
3080 }
3081 
3082 static void mark_insn_zext(struct bpf_verifier_env *env,
3083 			   struct bpf_reg_state *reg)
3084 {
3085 	s32 def_idx = reg->subreg_def;
3086 
3087 	if (def_idx == DEF_NOT_SUBREG)
3088 		return;
3089 
3090 	env->insn_aux_data[def_idx - 1].zext_dst = true;
3091 	/* The dst will be zero extended, so won't be sub-register anymore. */
3092 	reg->subreg_def = DEF_NOT_SUBREG;
3093 }
3094 
3095 static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
3096 			   enum bpf_reg_arg_type t)
3097 {
3098 	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3099 	struct bpf_reg_state *reg;
3100 	bool rw64;
3101 
3102 	mark_reg_scratched(env, regno);
3103 
3104 	reg = &regs[regno];
3105 	rw64 = bpf_is_reg64(insn, regno, reg, t);
3106 	if (t == SRC_OP) {
3107 		/* check whether register used as source operand can be read */
3108 		if (reg->type == NOT_INIT) {
3109 			verbose(env, "R%d !read_ok\n", regno);
3110 			return -EACCES;
3111 		}
3112 		/* We don't need to worry about FP liveness because it's read-only */
3113 		if (regno == BPF_REG_FP)
3114 			return 0;
3115 
3116 		if (rw64)
3117 			mark_insn_zext(env, reg);
3118 
3119 		return 0;
3120 	} else {
3121 		/* check whether register used as dest operand can be written to */
3122 		if (regno == BPF_REG_FP) {
3123 			verbose(env, "frame pointer is read only\n");
3124 			return -EACCES;
3125 		}
3126 		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
3127 		if (t == DST_OP)
3128 			mark_reg_unknown(env, regs, regno);
3129 	}
3130 	return 0;
3131 }
3132 
3133 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3134 			 enum bpf_reg_arg_type t)
3135 {
3136 	struct bpf_verifier_state *vstate = env->cur_state;
3137 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3138 
3139 	return __check_reg_arg(env, state->regs, regno, t);
3140 }
3141 
3142 static void mark_indirect_target(struct bpf_verifier_env *env, int idx)
3143 {
3144 	env->insn_aux_data[idx].indirect_target = true;
3145 }
3146 
3147 #define LR_FRAMENO_BITS	4
3148 #define LR_SPI_BITS	6
3149 #define LR_ENTRY_BITS	(LR_SPI_BITS + LR_FRAMENO_BITS + 1)
3150 #define LR_SIZE_BITS	4
3151 #define LR_FRAMENO_MASK	((1ull << LR_FRAMENO_BITS) - 1)
3152 #define LR_SPI_MASK	((1ull << LR_SPI_BITS)     - 1)
3153 #define LR_SIZE_MASK	((1ull << LR_SIZE_BITS)    - 1)
3154 #define LR_SPI_OFF	LR_FRAMENO_BITS
3155 #define LR_IS_REG_OFF	(LR_SPI_BITS + LR_FRAMENO_BITS)
3156 #define LINKED_REGS_MAX	5
3157 
3158 static_assert(MAX_CALL_FRAMES <= (1 << LR_FRAMENO_BITS));
3159 static_assert(LINKED_REGS_MAX < (1 << LR_SIZE_BITS));
3160 static_assert(LINKED_REGS_MAX * LR_ENTRY_BITS + LR_SIZE_BITS <= 64);
3161 
3162 struct linked_reg {
3163 	u8 frameno;
3164 	union {
3165 		u8 spi;
3166 		u8 regno;
3167 	};
3168 	bool is_reg;
3169 };
3170 
3171 struct linked_regs {
3172 	int cnt;
3173 	struct linked_reg entries[LINKED_REGS_MAX];
3174 };
3175 
3176 static struct linked_reg *linked_regs_push(struct linked_regs *s)
3177 {
3178 	if (s->cnt < LINKED_REGS_MAX)
3179 		return &s->entries[s->cnt++];
3180 
3181 	return NULL;
3182 }
3183 
3184 /*
3185  * Use u64 as a vector of 5 11-bit values, use first 4-bits to track
3186  * number of elements currently in stack.
3187  * Pack one history entry for linked registers as 11 bits in the following format:
3188  * - 4-bits frameno
3189  * - 6-bits spi_or_reg
3190  * - 1-bit  is_reg
3191  */
3192 static u64 linked_regs_pack(struct linked_regs *s)
3193 {
3194 	u64 val = 0;
3195 	int i;
3196 
3197 	for (i = 0; i < s->cnt; ++i) {
3198 		struct linked_reg *e = &s->entries[i];
3199 		u64 tmp = 0;
3200 
3201 		tmp |= e->frameno;
3202 		tmp |= e->spi << LR_SPI_OFF;
3203 		tmp |= (e->is_reg ? 1 : 0) << LR_IS_REG_OFF;
3204 
3205 		val <<= LR_ENTRY_BITS;
3206 		val |= tmp;
3207 	}
3208 	val <<= LR_SIZE_BITS;
3209 	val |= s->cnt;
3210 	return val;
3211 }
3212 
3213 static void linked_regs_unpack(u64 val, struct linked_regs *s)
3214 {
3215 	int i;
3216 
3217 	s->cnt = val & LR_SIZE_MASK;
3218 	val >>= LR_SIZE_BITS;
3219 
3220 	for (i = 0; i < s->cnt; ++i) {
3221 		struct linked_reg *e = &s->entries[i];
3222 
3223 		e->frameno =  val & LR_FRAMENO_MASK;
3224 		e->spi     = (val >> LR_SPI_OFF) & LR_SPI_MASK;
3225 		e->is_reg  = (val >> LR_IS_REG_OFF) & 0x1;
3226 		val >>= LR_ENTRY_BITS;
3227 	}
3228 }
3229 
3230 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
3231 {
3232 	const struct btf_type *func;
3233 	struct btf *desc_btf;
3234 
3235 	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
3236 		return NULL;
3237 
3238 	desc_btf = find_kfunc_desc_btf(data, insn->off);
3239 	if (IS_ERR(desc_btf))
3240 		return "<error>";
3241 
3242 	func = btf_type_by_id(desc_btf, insn->imm);
3243 	return btf_name_by_offset(desc_btf, func->name_off);
3244 }
3245 
3246 void bpf_verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
3247 {
3248 	const struct bpf_insn_cbs cbs = {
3249 		.cb_call	= disasm_kfunc_name,
3250 		.cb_print	= verbose,
3251 		.private_data	= env,
3252 	};
3253 
3254 	print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
3255 }
3256 
3257 /* If any register R in hist->linked_regs is marked as precise in bt,
3258  * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs.
3259  */
3260 void bpf_bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist)
3261 {
3262 	struct linked_regs linked_regs;
3263 	bool some_precise = false;
3264 	int i;
3265 
3266 	if (!hist || hist->linked_regs == 0)
3267 		return;
3268 
3269 	linked_regs_unpack(hist->linked_regs, &linked_regs);
3270 	for (i = 0; i < linked_regs.cnt; ++i) {
3271 		struct linked_reg *e = &linked_regs.entries[i];
3272 
3273 		if ((e->is_reg && bt_is_frame_reg_set(bt, e->frameno, e->regno)) ||
3274 		    (!e->is_reg && bt_is_frame_slot_set(bt, e->frameno, e->spi))) {
3275 			some_precise = true;
3276 			break;
3277 		}
3278 	}
3279 
3280 	if (!some_precise)
3281 		return;
3282 
3283 	for (i = 0; i < linked_regs.cnt; ++i) {
3284 		struct linked_reg *e = &linked_regs.entries[i];
3285 
3286 		if (e->is_reg)
3287 			bpf_bt_set_frame_reg(bt, e->frameno, e->regno);
3288 		else
3289 			bpf_bt_set_frame_slot(bt, e->frameno, e->spi);
3290 	}
3291 }
3292 
3293 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
3294 {
3295 	return bpf_mark_chain_precision(env, env->cur_state, regno, NULL);
3296 }
3297 
3298 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to
3299  * desired reg and stack masks across all relevant frames
3300  */
3301 static int mark_chain_precision_batch(struct bpf_verifier_env *env,
3302 				      struct bpf_verifier_state *starting_state)
3303 {
3304 	return bpf_mark_chain_precision(env, starting_state, -1, NULL);
3305 }
3306 
3307 static bool is_spillable_regtype(enum bpf_reg_type type)
3308 {
3309 	switch (base_type(type)) {
3310 	case PTR_TO_MAP_VALUE:
3311 	case PTR_TO_STACK:
3312 	case PTR_TO_CTX:
3313 	case PTR_TO_PACKET:
3314 	case PTR_TO_PACKET_META:
3315 	case PTR_TO_PACKET_END:
3316 	case PTR_TO_FLOW_KEYS:
3317 	case CONST_PTR_TO_MAP:
3318 	case PTR_TO_SOCKET:
3319 	case PTR_TO_SOCK_COMMON:
3320 	case PTR_TO_TCP_SOCK:
3321 	case PTR_TO_XDP_SOCK:
3322 	case PTR_TO_BTF_ID:
3323 	case PTR_TO_BUF:
3324 	case PTR_TO_MEM:
3325 	case PTR_TO_FUNC:
3326 	case PTR_TO_MAP_KEY:
3327 	case PTR_TO_ARENA:
3328 		return true;
3329 	default:
3330 		return false;
3331 	}
3332 }
3333 
3334 
3335 /* check if register is a constant scalar value */
3336 static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
3337 {
3338 	return reg->type == SCALAR_VALUE &&
3339 	       tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off);
3340 }
3341 
3342 /* assuming is_reg_const() is true, return constant value of a register */
3343 static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
3344 {
3345 	return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value;
3346 }
3347 
3348 static bool __is_pointer_value(bool allow_ptr_leaks,
3349 			       const struct bpf_reg_state *reg)
3350 {
3351 	if (allow_ptr_leaks)
3352 		return false;
3353 
3354 	return reg->type != SCALAR_VALUE;
3355 }
3356 
3357 static void clear_scalar_id(struct bpf_reg_state *reg)
3358 {
3359 	reg->id = 0;
3360 	reg->delta = 0;
3361 }
3362 
3363 static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
3364 					struct bpf_reg_state *src_reg)
3365 {
3366 	if (src_reg->type != SCALAR_VALUE)
3367 		return;
3368 	/*
3369 	 * The verifier is processing rX = rY insn and
3370 	 * rY->id has special linked register already.
3371 	 * Cleared it, since multiple rX += const are not supported.
3372 	 */
3373 	if (src_reg->id & BPF_ADD_CONST)
3374 		clear_scalar_id(src_reg);
3375 	/*
3376 	 * Ensure that src_reg has a valid ID that will be copied to
3377 	 * dst_reg and then will be used by sync_linked_regs() to
3378 	 * propagate min/max range.
3379 	 */
3380 	if (!src_reg->id && !tnum_is_const(src_reg->var_off))
3381 		src_reg->id = ++env->id_gen;
3382 }
3383 
3384 static void save_register_state(struct bpf_verifier_env *env,
3385 				struct bpf_func_state *state,
3386 				int spi, struct bpf_reg_state *reg,
3387 				int size)
3388 {
3389 	int i;
3390 
3391 	state->stack[spi].spilled_ptr = *reg;
3392 
3393 	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
3394 		state->stack[spi].slot_type[i - 1] = STACK_SPILL;
3395 
3396 	/* size < 8 bytes spill */
3397 	for (; i; i--)
3398 		mark_stack_slot_misc(env, &state->stack[spi].slot_type[i - 1]);
3399 }
3400 
3401 static bool is_bpf_st_mem(struct bpf_insn *insn)
3402 {
3403 	return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
3404 }
3405 
3406 static int get_reg_width(struct bpf_reg_state *reg)
3407 {
3408 	return fls64(reg_umax(reg));
3409 }
3410 
3411 /* See comment for mark_fastcall_pattern_for_call() */
3412 static void check_fastcall_stack_contract(struct bpf_verifier_env *env,
3413 					  struct bpf_func_state *state, int insn_idx, int off)
3414 {
3415 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
3416 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
3417 	int i;
3418 
3419 	if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern)
3420 		return;
3421 	/* access to the region [max_stack_depth .. fastcall_stack_off)
3422 	 * from something that is not a part of the fastcall pattern,
3423 	 * disable fastcall rewrites for current subprogram by setting
3424 	 * fastcall_stack_off to a value smaller than any possible offset.
3425 	 */
3426 	subprog->fastcall_stack_off = S16_MIN;
3427 	/* reset fastcall aux flags within subprogram,
3428 	 * happens at most once per subprogram
3429 	 */
3430 	for (i = subprog->start; i < (subprog + 1)->start; ++i) {
3431 		aux[i].fastcall_spills_num = 0;
3432 		aux[i].fastcall_pattern = 0;
3433 	}
3434 }
3435 
3436 static void scrub_special_slot(struct bpf_func_state *state, int spi)
3437 {
3438 	int i;
3439 
3440 	/* regular write of data into stack destroys any spilled ptr */
3441 	state->stack[spi].spilled_ptr.type = NOT_INIT;
3442 	/* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
3443 	if (is_stack_slot_special(&state->stack[spi]))
3444 		for (i = 0; i < BPF_REG_SIZE; i++)
3445 			scrub_spilled_slot(&state->stack[spi].slot_type[i]);
3446 }
3447 
3448 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
3449  * stack boundary and alignment are checked in check_mem_access()
3450  */
3451 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
3452 				       /* stack frame we're writing to */
3453 				       struct bpf_func_state *state,
3454 				       int off, int size, int value_regno,
3455 				       int insn_idx)
3456 {
3457 	struct bpf_func_state *cur; /* state of the current function */
3458 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
3459 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3460 	struct bpf_reg_state *reg = NULL;
3461 	int insn_flags = INSN_F_STACK_ACCESS;
3462 	int hist_spi = spi, hist_frame = state->frameno;
3463 
3464 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
3465 	 * so it's aligned access and [off, off + size) are within stack limits
3466 	 */
3467 	if (!env->allow_ptr_leaks &&
3468 	    bpf_is_spilled_reg(&state->stack[spi]) &&
3469 	    !bpf_is_spilled_scalar_reg(&state->stack[spi]) &&
3470 	    size != BPF_REG_SIZE) {
3471 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
3472 		return -EACCES;
3473 	}
3474 
3475 	cur = env->cur_state->frame[env->cur_state->curframe];
3476 	if (value_regno >= 0)
3477 		reg = &cur->regs[value_regno];
3478 	if (!env->bypass_spec_v4) {
3479 		bool sanitize = reg && is_spillable_regtype(reg->type);
3480 
3481 		for (i = 0; i < size; i++) {
3482 			u8 type = state->stack[spi].slot_type[i];
3483 
3484 			if (type != STACK_MISC && type != STACK_ZERO) {
3485 				sanitize = true;
3486 				break;
3487 			}
3488 		}
3489 
3490 		if (sanitize)
3491 			env->insn_aux_data[insn_idx].nospec_result = true;
3492 	}
3493 
3494 	err = destroy_if_dynptr_stack_slot(env, state, spi);
3495 	if (err)
3496 		return err;
3497 
3498 	check_fastcall_stack_contract(env, state, insn_idx, off);
3499 	mark_stack_slot_scratched(env, spi);
3500 	if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
3501 		bool reg_value_fits;
3502 
3503 		reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size;
3504 		/* Make sure that reg had an ID to build a relation on spill. */
3505 		if (reg_value_fits)
3506 			assign_scalar_id_before_mov(env, reg);
3507 		save_register_state(env, state, spi, reg, size);
3508 		/* Break the relation on a narrowing spill. */
3509 		if (!reg_value_fits)
3510 			state->stack[spi].spilled_ptr.id = 0;
3511 	} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
3512 		   env->bpf_capable) {
3513 		struct bpf_reg_state *tmp_reg = &env->fake_reg[0];
3514 
3515 		memset(tmp_reg, 0, sizeof(*tmp_reg));
3516 		__mark_reg_known(tmp_reg, insn->imm);
3517 		tmp_reg->type = SCALAR_VALUE;
3518 		save_register_state(env, state, spi, tmp_reg, size);
3519 	} else if (reg && is_spillable_regtype(reg->type)) {
3520 		/* register containing pointer is being spilled into stack */
3521 		if (size != BPF_REG_SIZE) {
3522 			verbose_linfo(env, insn_idx, "; ");
3523 			verbose(env, "invalid size of register spill\n");
3524 			return -EACCES;
3525 		}
3526 		if (state != cur && reg->type == PTR_TO_STACK) {
3527 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
3528 			return -EINVAL;
3529 		}
3530 		save_register_state(env, state, spi, reg, size);
3531 	} else {
3532 		u8 type = STACK_MISC;
3533 
3534 		scrub_special_slot(state, spi);
3535 
3536 		/* when we zero initialize stack slots mark them as such */
3537 		if ((reg && bpf_register_is_null(reg)) ||
3538 		    (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
3539 			/* STACK_ZERO case happened because register spill
3540 			 * wasn't properly aligned at the stack slot boundary,
3541 			 * so it's not a register spill anymore; force
3542 			 * originating register to be precise to make
3543 			 * STACK_ZERO correct for subsequent states
3544 			 */
3545 			err = mark_chain_precision(env, value_regno);
3546 			if (err)
3547 				return err;
3548 			type = STACK_ZERO;
3549 		}
3550 
3551 		/* Mark slots affected by this stack write. */
3552 		for (i = 0; i < size; i++)
3553 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
3554 		insn_flags = 0; /* not a register spill */
3555 	}
3556 
3557 	if (insn_flags)
3558 		return bpf_push_jmp_history(env, env->cur_state, insn_flags,
3559 					    hist_spi, hist_frame, 0);
3560 	return 0;
3561 }
3562 
3563 /* Write the stack: 'stack[ptr_reg + off] = value_regno'. 'ptr_reg' is
3564  * known to contain a variable offset.
3565  * This function checks whether the write is permitted and conservatively
3566  * tracks the effects of the write, considering that each stack slot in the
3567  * dynamic range is potentially written to.
3568  *
3569  * 'value_regno' can be -1, meaning that an unknown value is being written to
3570  * the stack.
3571  *
3572  * Spilled pointers in range are not marked as written because we don't know
3573  * what's going to be actually written. This means that read propagation for
3574  * future reads cannot be terminated by this write.
3575  *
3576  * For privileged programs, uninitialized stack slots are considered
3577  * initialized by this write (even though we don't know exactly what offsets
3578  * are going to be written to). The idea is that we don't want the verifier to
3579  * reject future reads that access slots written to through variable offsets.
3580  */
3581 static int check_stack_write_var_off(struct bpf_verifier_env *env,
3582 				     /* func where register points to */
3583 				     struct bpf_func_state *state,
3584 				     struct bpf_reg_state *ptr_reg, int off, int size,
3585 				     int value_regno, int insn_idx)
3586 {
3587 	struct bpf_func_state *cur; /* state of the current function */
3588 	int min_off, max_off;
3589 	int i, err;
3590 	struct bpf_reg_state *value_reg = NULL;
3591 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3592 	bool writing_zero = false;
3593 	/* set if the fact that we're writing a zero is used to let any
3594 	 * stack slots remain STACK_ZERO
3595 	 */
3596 	bool zero_used = false;
3597 
3598 	cur = env->cur_state->frame[env->cur_state->curframe];
3599 	min_off = reg_smin(ptr_reg) + off;
3600 	max_off = reg_smax(ptr_reg) + off + size;
3601 	if (value_regno >= 0)
3602 		value_reg = &cur->regs[value_regno];
3603 	if ((value_reg && bpf_register_is_null(value_reg)) ||
3604 	    (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
3605 		writing_zero = true;
3606 
3607 	for (i = min_off; i < max_off; i++) {
3608 		int spi;
3609 
3610 		spi = bpf_get_spi(i);
3611 		err = destroy_if_dynptr_stack_slot(env, state, spi);
3612 		if (err)
3613 			return err;
3614 	}
3615 
3616 	check_fastcall_stack_contract(env, state, insn_idx, min_off);
3617 	/* Variable offset writes destroy any spilled pointers in range. */
3618 	for (i = min_off; i < max_off; i++) {
3619 		u8 new_type, *stype;
3620 		int slot, spi;
3621 
3622 		slot = -i - 1;
3623 		spi = slot / BPF_REG_SIZE;
3624 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3625 		mark_stack_slot_scratched(env, spi);
3626 
3627 		if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
3628 			/* Reject the write if range we may write to has not
3629 			 * been initialized beforehand. If we didn't reject
3630 			 * here, the ptr status would be erased below (even
3631 			 * though not all slots are actually overwritten),
3632 			 * possibly opening the door to leaks.
3633 			 *
3634 			 * We do however catch STACK_INVALID case below, and
3635 			 * only allow reading possibly uninitialized memory
3636 			 * later for CAP_PERFMON, as the write may not happen to
3637 			 * that slot.
3638 			 */
3639 			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3640 				insn_idx, i);
3641 			return -EINVAL;
3642 		}
3643 
3644 		/* If writing_zero and the spi slot contains a spill of value 0,
3645 		 * maintain the spill type.
3646 		 */
3647 		if (writing_zero && *stype == STACK_SPILL &&
3648 		    bpf_is_spilled_scalar_reg(&state->stack[spi])) {
3649 			struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
3650 
3651 			if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) {
3652 				zero_used = true;
3653 				continue;
3654 			}
3655 		}
3656 
3657 		/*
3658 		 * Scrub slots if variable-offset stack write goes over spilled pointers.
3659 		 * Otherwise bpf_is_spilled_reg() may == true && spilled_ptr.type == NOT_INIT
3660 		 * and valid program is rejected by check_stack_read_fixed_off()
3661 		 * with obscure "invalid size of register fill" message.
3662 		 */
3663 		scrub_special_slot(state, spi);
3664 
3665 		/* Update the slot type. */
3666 		new_type = STACK_MISC;
3667 		if (writing_zero && *stype == STACK_ZERO) {
3668 			new_type = STACK_ZERO;
3669 			zero_used = true;
3670 		}
3671 		/* If the slot is STACK_INVALID, we check whether it's OK to
3672 		 * pretend that it will be initialized by this write. The slot
3673 		 * might not actually be written to, and so if we mark it as
3674 		 * initialized future reads might leak uninitialized memory.
3675 		 * For privileged programs, we will accept such reads to slots
3676 		 * that may or may not be written because, if we're reject
3677 		 * them, the error would be too confusing.
3678 		 * Conservatively, treat STACK_POISON in a similar way.
3679 		 */
3680 		if ((*stype == STACK_INVALID || *stype == STACK_POISON) &&
3681 		    !env->allow_uninit_stack) {
3682 			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
3683 					insn_idx, i);
3684 			return -EINVAL;
3685 		}
3686 		*stype = new_type;
3687 	}
3688 	if (zero_used) {
3689 		/* backtracking doesn't work for STACK_ZERO yet. */
3690 		err = mark_chain_precision(env, value_regno);
3691 		if (err)
3692 			return err;
3693 	}
3694 	return 0;
3695 }
3696 
3697 /* When register 'dst_regno' is assigned some values from stack[min_off,
3698  * max_off), we set the register's type according to the types of the
3699  * respective stack slots. If all the stack values are known to be zeros, then
3700  * so is the destination reg. Otherwise, the register is considered to be
3701  * SCALAR. This function does not deal with register filling; the caller must
3702  * ensure that all spilled registers in the stack range have been marked as
3703  * read.
3704  */
3705 static void mark_reg_stack_read(struct bpf_verifier_env *env,
3706 				/* func where src register points to */
3707 				struct bpf_func_state *ptr_state,
3708 				int min_off, int max_off, int dst_regno)
3709 {
3710 	struct bpf_verifier_state *vstate = env->cur_state;
3711 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3712 	int i, slot, spi;
3713 	u8 *stype;
3714 	int zeros = 0;
3715 
3716 	for (i = min_off; i < max_off; i++) {
3717 		slot = -i - 1;
3718 		spi = slot / BPF_REG_SIZE;
3719 		mark_stack_slot_scratched(env, spi);
3720 		stype = ptr_state->stack[spi].slot_type;
3721 		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3722 			break;
3723 		zeros++;
3724 	}
3725 	if (zeros == max_off - min_off) {
3726 		/* Any access_size read into register is zero extended,
3727 		 * so the whole register == const_zero.
3728 		 */
3729 		__mark_reg_const_zero(env, &state->regs[dst_regno]);
3730 	} else {
3731 		/* have read misc data from the stack */
3732 		mark_reg_unknown(env, state->regs, dst_regno);
3733 	}
3734 }
3735 
3736 /* Read the stack at 'off' and put the results into the register indicated by
3737  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3738  * spilled reg.
3739  *
3740  * 'dst_regno' can be -1, meaning that the read value is not going to a
3741  * register.
3742  *
3743  * The access is assumed to be within the current stack bounds.
3744  */
3745 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3746 				      /* func where src register points to */
3747 				      struct bpf_func_state *reg_state,
3748 				      int off, int size, int dst_regno)
3749 {
3750 	struct bpf_verifier_state *vstate = env->cur_state;
3751 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3752 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
3753 	struct bpf_reg_state *reg;
3754 	u8 *stype, type;
3755 	int insn_flags = INSN_F_STACK_ACCESS;
3756 	int hist_spi = spi, hist_frame = reg_state->frameno;
3757 
3758 	stype = reg_state->stack[spi].slot_type;
3759 	reg = &reg_state->stack[spi].spilled_ptr;
3760 
3761 	mark_stack_slot_scratched(env, spi);
3762 	check_fastcall_stack_contract(env, state, env->insn_idx, off);
3763 
3764 	if (bpf_is_spilled_reg(&reg_state->stack[spi])) {
3765 		u8 spill_size = 1;
3766 
3767 		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3768 			spill_size++;
3769 
3770 		if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
3771 			if (reg->type != SCALAR_VALUE) {
3772 				verbose_linfo(env, env->insn_idx, "; ");
3773 				verbose(env, "invalid size of register fill\n");
3774 				return -EACCES;
3775 			}
3776 
3777 			if (dst_regno < 0)
3778 				return 0;
3779 
3780 			if (size <= spill_size &&
3781 			    bpf_stack_narrow_access_ok(off, size, spill_size)) {
3782 				/* The earlier check_reg_arg() has decided the
3783 				 * subreg_def for this insn.  Save it first.
3784 				 */
3785 				s32 subreg_def = state->regs[dst_regno].subreg_def;
3786 
3787 				if (env->bpf_capable && size == 4 && spill_size == 4 &&
3788 				    get_reg_width(reg) <= 32)
3789 					/* Ensure stack slot has an ID to build a relation
3790 					 * with the destination register on fill.
3791 					 */
3792 					assign_scalar_id_before_mov(env, reg);
3793 				state->regs[dst_regno] = *reg;
3794 				state->regs[dst_regno].subreg_def = subreg_def;
3795 
3796 				/* Break the relation on a narrowing fill.
3797 				 * coerce_reg_to_size will adjust the boundaries.
3798 				 */
3799 				if (get_reg_width(reg) > size * BITS_PER_BYTE)
3800 					clear_scalar_id(&state->regs[dst_regno]);
3801 			} else {
3802 				int spill_cnt = 0, zero_cnt = 0;
3803 
3804 				for (i = 0; i < size; i++) {
3805 					type = stype[(slot - i) % BPF_REG_SIZE];
3806 					if (type == STACK_SPILL) {
3807 						spill_cnt++;
3808 						continue;
3809 					}
3810 					if (type == STACK_MISC)
3811 						continue;
3812 					if (type == STACK_ZERO) {
3813 						zero_cnt++;
3814 						continue;
3815 					}
3816 					if (type == STACK_INVALID && env->allow_uninit_stack)
3817 						continue;
3818 					if (type == STACK_POISON) {
3819 						verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
3820 							off, i, size);
3821 					} else {
3822 						verbose(env, "invalid read from stack off %d+%d size %d\n",
3823 							off, i, size);
3824 					}
3825 					return -EACCES;
3826 				}
3827 
3828 				if (spill_cnt == size &&
3829 				    tnum_is_const(reg->var_off) && reg->var_off.value == 0) {
3830 					__mark_reg_const_zero(env, &state->regs[dst_regno]);
3831 					/* this IS register fill, so keep insn_flags */
3832 				} else if (zero_cnt == size) {
3833 					/* similarly to mark_reg_stack_read(), preserve zeroes */
3834 					__mark_reg_const_zero(env, &state->regs[dst_regno]);
3835 					insn_flags = 0; /* not restoring original register state */
3836 				} else {
3837 					mark_reg_unknown(env, state->regs, dst_regno);
3838 					insn_flags = 0; /* not restoring original register state */
3839 				}
3840 			}
3841 		} else if (dst_regno >= 0) {
3842 			/* restore register state from stack */
3843 			if (env->bpf_capable)
3844 				/* Ensure stack slot has an ID to build a relation
3845 				 * with the destination register on fill.
3846 				 */
3847 				assign_scalar_id_before_mov(env, reg);
3848 			state->regs[dst_regno] = *reg;
3849 			/* mark reg as written since spilled pointer state likely
3850 			 * has its liveness marks cleared by is_state_visited()
3851 			 * which resets stack/reg liveness for state transitions
3852 			 */
3853 		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
3854 			/* If dst_regno==-1, the caller is asking us whether
3855 			 * it is acceptable to use this value as a SCALAR_VALUE
3856 			 * (e.g. for XADD).
3857 			 * We must not allow unprivileged callers to do that
3858 			 * with spilled pointers.
3859 			 */
3860 			verbose(env, "leaking pointer from stack off %d\n",
3861 				off);
3862 			return -EACCES;
3863 		}
3864 	} else {
3865 		for (i = 0; i < size; i++) {
3866 			type = stype[(slot - i) % BPF_REG_SIZE];
3867 			if (type == STACK_MISC)
3868 				continue;
3869 			if (type == STACK_ZERO)
3870 				continue;
3871 			if (type == STACK_INVALID && env->allow_uninit_stack)
3872 				continue;
3873 			if (type == STACK_POISON) {
3874 				verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
3875 					off, i, size);
3876 			} else {
3877 				verbose(env, "invalid read from stack off %d+%d size %d\n",
3878 					off, i, size);
3879 			}
3880 			return -EACCES;
3881 		}
3882 		if (dst_regno >= 0)
3883 			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
3884 		insn_flags = 0; /* we are not restoring spilled register */
3885 	}
3886 	if (insn_flags)
3887 		return bpf_push_jmp_history(env, env->cur_state, insn_flags,
3888 					    hist_spi, hist_frame, 0);
3889 	return 0;
3890 }
3891 
3892 enum bpf_access_src {
3893 	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
3894 	ACCESS_HELPER = 2,  /* the access is performed by a helper */
3895 };
3896 
3897 static int check_stack_range_initialized(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3898 					 argno_t argno, int off, int access_size,
3899 					 bool zero_size_allowed,
3900 					 enum bpf_access_type type,
3901 					 struct bpf_call_arg_meta *meta);
3902 
3903 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3904 {
3905 	return cur_regs(env) + regno;
3906 }
3907 
3908 /* Read the stack at 'reg + off' and put the result into the register
3909  * 'dst_regno'.
3910  * 'off' includes the pointer register's fixed offset(i.e. 'reg->off'),
3911  * but not its variable offset.
3912  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3913  *
3914  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3915  * filling registers (i.e. reads of spilled register cannot be detected when
3916  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3917  * SCALAR_VALUE. That's why we assert that the 'reg' has a variable
3918  * offset; for a fixed offset check_stack_read_fixed_off should be used
3919  * instead.
3920  */
3921 static int check_stack_read_var_off(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3922 				    argno_t ptr_argno, int off, int size, int dst_regno)
3923 {
3924 	struct bpf_func_state *ptr_state = bpf_func(env, reg);
3925 	int err;
3926 	int min_off, max_off;
3927 
3928 	/* Note that we pass a NULL meta, so raw access will not be permitted.
3929 	 */
3930 	err = check_stack_range_initialized(env, reg, ptr_argno, off, size,
3931 					    false, BPF_READ, NULL);
3932 	if (err)
3933 		return err;
3934 
3935 	min_off = reg_smin(reg) + off;
3936 	max_off = reg_smax(reg) + off;
3937 	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
3938 	check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off);
3939 	return 0;
3940 }
3941 
3942 /* check_stack_read dispatches to check_stack_read_fixed_off or
3943  * check_stack_read_var_off.
3944  *
3945  * The caller must ensure that the offset falls within the allocated stack
3946  * bounds.
3947  *
3948  * 'dst_regno' is a register which will receive the value from the stack. It
3949  * can be -1, meaning that the read value is not going to a register.
3950  */
3951 static int check_stack_read(struct bpf_verifier_env *env,
3952 			    struct bpf_reg_state *reg, argno_t ptr_argno, int off, int size,
3953 			    int dst_regno)
3954 {
3955 	struct bpf_func_state *state = bpf_func(env, reg);
3956 	int err;
3957 	/* Some accesses are only permitted with a static offset. */
3958 	bool var_off = !tnum_is_const(reg->var_off);
3959 
3960 	/* The offset is required to be static when reads don't go to a
3961 	 * register, in order to not leak pointers (see
3962 	 * check_stack_read_fixed_off).
3963 	 */
3964 	if (dst_regno < 0 && var_off) {
3965 		char tn_buf[48];
3966 
3967 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3968 		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
3969 			tn_buf, off, size);
3970 		return -EACCES;
3971 	}
3972 	/* Variable offset is prohibited for unprivileged mode for simplicity
3973 	 * since it requires corresponding support in Spectre masking for stack
3974 	 * ALU. See also retrieve_ptr_limit(). The check in
3975 	 * check_stack_access_for_ptr_arithmetic() called by
3976 	 * adjust_ptr_min_max_vals() prevents users from creating stack pointers
3977 	 * with variable offsets, therefore no check is required here. Further,
3978 	 * just checking it here would be insufficient as speculative stack
3979 	 * writes could still lead to unsafe speculative behaviour.
3980 	 */
3981 	if (!var_off) {
3982 		off += reg->var_off.value;
3983 		err = check_stack_read_fixed_off(env, state, off, size,
3984 						 dst_regno);
3985 	} else {
3986 		/* Variable offset stack reads need more conservative handling
3987 		 * than fixed offset ones. Note that dst_regno >= 0 on this
3988 		 * branch.
3989 		 */
3990 		err = check_stack_read_var_off(env, reg, ptr_argno, off, size,
3991 					       dst_regno);
3992 	}
3993 	return err;
3994 }
3995 
3996 
3997 /* check_stack_write dispatches to check_stack_write_fixed_off or
3998  * check_stack_write_var_off.
3999  *
4000  * 'reg' is the register used as a pointer into the stack.
4001  * 'value_regno' is the register whose value we're writing to the stack. It can
4002  * be -1, meaning that we're not writing from a register.
4003  *
4004  * The caller must ensure that the offset falls within the maximum stack size.
4005  */
4006 static int check_stack_write(struct bpf_verifier_env *env,
4007 			     struct bpf_reg_state *reg, int off, int size,
4008 			     int value_regno, int insn_idx)
4009 {
4010 	struct bpf_func_state *state = bpf_func(env, reg);
4011 	int err;
4012 
4013 	if (tnum_is_const(reg->var_off)) {
4014 		off += reg->var_off.value;
4015 		err = check_stack_write_fixed_off(env, state, off, size,
4016 						  value_regno, insn_idx);
4017 	} else {
4018 		/* Variable offset stack reads need more conservative handling
4019 		 * than fixed offset ones.
4020 		 */
4021 		err = check_stack_write_var_off(env, state,
4022 						reg, off, size,
4023 						value_regno, insn_idx);
4024 	}
4025 	return err;
4026 }
4027 
4028 /*
4029  * Write a value to the outgoing stack arg area.
4030  * off is a negative offset from r11 (e.g. -8 for arg6, -16 for arg7).
4031  */
4032 static int check_stack_arg_write(struct bpf_verifier_env *env, struct bpf_func_state *state,
4033 				 int off, struct bpf_reg_state *value_reg)
4034 {
4035 	int max_stack_arg_regs = MAX_BPF_FUNC_ARGS - MAX_BPF_FUNC_REG_ARGS;
4036 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
4037 	int spi = -off / BPF_REG_SIZE - 1;
4038 	struct bpf_reg_state *arg;
4039 	int err;
4040 
4041 	if (spi >= max_stack_arg_regs) {
4042 		verbose(env, "stack arg write offset %d exceeds max %d stack args\n",
4043 			off, max_stack_arg_regs);
4044 		return -EINVAL;
4045 	}
4046 
4047 	err = grow_stack_arg_slots(env, state, spi + 1);
4048 	if (err)
4049 		return err;
4050 
4051 	/* Track the max outgoing stack arg slot count. */
4052 	if (spi + 1 > subprog->max_out_stack_arg_cnt)
4053 		subprog->max_out_stack_arg_cnt = spi + 1;
4054 
4055 	if (value_reg) {
4056 		state->stack_arg_regs[spi] = *value_reg;
4057 	} else {
4058 		/* BPF_ST: store immediate, treat as scalar */
4059 		arg = &state->stack_arg_regs[spi];
4060 		arg->type = SCALAR_VALUE;
4061 		__mark_reg_known(arg, env->prog->insnsi[env->insn_idx].imm);
4062 	}
4063 	state->no_stack_arg_load = true;
4064 	return bpf_push_jmp_history(env, env->cur_state,
4065 				    INSN_F_STACK_ARG_ACCESS, spi, 0, 0);
4066 }
4067 
4068 /*
4069  * Read a value from the incoming stack arg area.
4070  * off is a positive offset from r11 (e.g. +8 for arg6, +16 for arg7).
4071  */
4072 static int check_stack_arg_read(struct bpf_verifier_env *env, struct bpf_func_state *state,
4073 				int off, int dst_regno)
4074 {
4075 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
4076 	struct bpf_verifier_state *vstate = env->cur_state;
4077 	int spi = off / BPF_REG_SIZE - 1;
4078 	struct bpf_func_state *caller, *cur;
4079 	struct bpf_reg_state *arg;
4080 
4081 	if (state->no_stack_arg_load) {
4082 		verbose(env, "r11 load must be before any r11 store or call insn\n");
4083 		return -EINVAL;
4084 	}
4085 
4086 	if (spi + 1 > bpf_in_stack_arg_cnt(subprog)) {
4087 		verbose(env, "invalid read from stack arg off %d depth %d\n",
4088 			off, bpf_in_stack_arg_cnt(subprog) * BPF_REG_SIZE);
4089 		return -EACCES;
4090 	}
4091 
4092 	caller = vstate->frame[vstate->curframe - 1];
4093 	arg = &caller->stack_arg_regs[spi];
4094 	cur = vstate->frame[vstate->curframe];
4095 	cur->regs[dst_regno] = *arg;
4096 	return bpf_push_jmp_history(env, env->cur_state,
4097 				    INSN_F_STACK_ARG_ACCESS, spi, 0, 0);
4098 }
4099 
4100 static int mark_stack_arg_precision(struct bpf_verifier_env *env, int arg_idx)
4101 {
4102 	struct bpf_func_state *caller = cur_func(env);
4103 	int spi = arg_idx - MAX_BPF_FUNC_REG_ARGS;
4104 
4105 	bt_set_frame_stack_arg_slot(&env->bt, caller->frameno, spi);
4106 	return mark_chain_precision_batch(env, env->cur_state);
4107 }
4108 
4109 static int check_outgoing_stack_args(struct bpf_verifier_env *env, struct bpf_func_state *caller,
4110 				     int nargs)
4111 {
4112 	int i, spi;
4113 
4114 	for (i = MAX_BPF_FUNC_REG_ARGS; i < nargs; i++) {
4115 		spi = i - MAX_BPF_FUNC_REG_ARGS;
4116 		if (spi >= caller->out_stack_arg_cnt ||
4117 		    caller->stack_arg_regs[spi].type == NOT_INIT) {
4118 			verbose(env, "callee expects %d args, stack arg%d is not initialized\n",
4119 				nargs, spi + 1);
4120 			return -EFAULT;
4121 		}
4122 	}
4123 
4124 	return 0;
4125 }
4126 
4127 static struct bpf_reg_state *get_func_arg_reg(struct bpf_func_state *caller,
4128 					      struct bpf_reg_state *regs, int arg)
4129 {
4130 	if (arg < MAX_BPF_FUNC_REG_ARGS)
4131 		return &regs[arg + 1];
4132 
4133 	return &caller->stack_arg_regs[arg - MAX_BPF_FUNC_REG_ARGS];
4134 }
4135 
4136 static int check_map_access_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
4137 				 int off, int size, enum bpf_access_type type)
4138 {
4139 	struct bpf_map *map = reg->map_ptr;
4140 	u32 cap = bpf_map_flags_to_cap(map);
4141 
4142 	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
4143 		verbose(env, "write into map forbidden, value_size=%d off=%lld size=%d\n",
4144 			map->value_size, reg_smin(reg) + off, size);
4145 		return -EACCES;
4146 	}
4147 
4148 	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
4149 		verbose(env, "read from map forbidden, value_size=%d off=%lld size=%d\n",
4150 			map->value_size, reg_smin(reg) + off, size);
4151 		return -EACCES;
4152 	}
4153 
4154 	return 0;
4155 }
4156 
4157 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
4158 static int __check_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
4159 			      int off, int size, u32 mem_size,
4160 			      bool zero_size_allowed)
4161 {
4162 	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
4163 
4164 	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
4165 		return 0;
4166 
4167 	switch (reg->type) {
4168 	case PTR_TO_MAP_KEY:
4169 		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
4170 			mem_size, off, size);
4171 		break;
4172 	case PTR_TO_MAP_VALUE:
4173 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
4174 			mem_size, off, size);
4175 		break;
4176 	case PTR_TO_PACKET:
4177 	case PTR_TO_PACKET_META:
4178 	case PTR_TO_PACKET_END:
4179 		verbose(env, "invalid access to packet, off=%d size=%d, %s(id=%d,off=%d,r=%d)\n",
4180 			off, size, reg_arg_name(env, argno), reg->id, off, mem_size);
4181 		break;
4182 	case PTR_TO_CTX:
4183 		verbose(env, "invalid access to context, ctx_size=%d off=%d size=%d\n",
4184 			mem_size, off, size);
4185 		break;
4186 	case PTR_TO_MEM:
4187 	default:
4188 		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
4189 			mem_size, off, size);
4190 	}
4191 
4192 	return -EACCES;
4193 }
4194 
4195 /* check read/write into a memory region with possible variable offset */
4196 static int check_mem_region_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
4197 				   int off, int size, u32 mem_size,
4198 				   bool zero_size_allowed)
4199 {
4200 	int err;
4201 
4202 	/* We may have adjusted the register pointing to memory region, so we
4203 	 * need to try adding each of min_value and max_value to off
4204 	 * to make sure our theoretical access will be safe.
4205 	 *
4206 	 * The minimum value is only important with signed
4207 	 * comparisons where we can't assume the floor of a
4208 	 * value is 0.  If we are using signed variables for our
4209 	 * index'es we need to make sure that whatever we use
4210 	 * will have a set floor within our range.
4211 	 */
4212 	if (reg_smin(reg) < 0 &&
4213 	    (reg_smin(reg) == S64_MIN ||
4214 	     (off + reg_smin(reg) != (s64)(s32)(off + reg_smin(reg))) ||
4215 	      reg_smin(reg) + off < 0)) {
4216 		verbose(env, "%s min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4217 			reg_arg_name(env, argno));
4218 		return -EACCES;
4219 	}
4220 	err = __check_mem_access(env, reg, argno, reg_smin(reg) + off, size,
4221 				 mem_size, zero_size_allowed);
4222 	if (err) {
4223 		verbose(env, "%s min value is outside of the allowed memory range\n",
4224 			reg_arg_name(env, argno));
4225 		return err;
4226 	}
4227 
4228 	/* If we haven't set a max value then we need to bail since we can't be
4229 	 * sure we won't do bad things.
4230 	 * If reg_umax(reg) + off could overflow, treat that as unbounded too.
4231 	 */
4232 	if (reg_umax(reg) >= BPF_MAX_VAR_OFF) {
4233 		verbose(env, "%s unbounded memory access, make sure to bounds check any such access\n",
4234 			reg_arg_name(env, argno));
4235 		return -EACCES;
4236 	}
4237 	err = __check_mem_access(env, reg, argno, reg_umax(reg) + off, size,
4238 				 mem_size, zero_size_allowed);
4239 	if (err) {
4240 		verbose(env, "%s max value is outside of the allowed memory range\n",
4241 			reg_arg_name(env, argno));
4242 		return err;
4243 	}
4244 
4245 	return 0;
4246 }
4247 
4248 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
4249 			       const struct bpf_reg_state *reg, argno_t argno,
4250 			       bool fixed_off_ok)
4251 {
4252 	/* Access to this pointer-typed register or passing it to a helper
4253 	 * is only allowed in its original, unmodified form.
4254 	 */
4255 
4256 	if (!tnum_is_const(reg->var_off)) {
4257 		char tn_buf[48];
4258 
4259 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4260 		verbose(env, "variable %s access var_off=%s disallowed\n",
4261 			reg_type_str(env, reg->type), tn_buf);
4262 		return -EACCES;
4263 	}
4264 
4265 	if (reg_smin(reg) < 0) {
4266 		verbose(env, "negative offset %s ptr %s off=%lld disallowed\n",
4267 			reg_type_str(env, reg->type), reg_arg_name(env, argno), reg->var_off.value);
4268 		return -EACCES;
4269 	}
4270 
4271 	if (!fixed_off_ok && reg->var_off.value != 0) {
4272 		verbose(env, "dereference of modified %s ptr %s off=%lld disallowed\n",
4273 			reg_type_str(env, reg->type), reg_arg_name(env, argno), reg->var_off.value);
4274 		return -EACCES;
4275 	}
4276 
4277 	return 0;
4278 }
4279 
4280 static int check_ptr_off_reg(struct bpf_verifier_env *env,
4281 		             const struct bpf_reg_state *reg, int regno)
4282 {
4283 	return __check_ptr_off_reg(env, reg, argno_from_reg(regno), false);
4284 }
4285 
4286 static int map_kptr_match_type(struct bpf_verifier_env *env,
4287 			       struct btf_field *kptr_field,
4288 			       struct bpf_reg_state *reg, u32 regno)
4289 {
4290 	const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
4291 	int perm_flags;
4292 	const char *reg_name = "";
4293 
4294 	if (base_type(reg->type) != PTR_TO_BTF_ID)
4295 		goto bad_type;
4296 
4297 	if (btf_is_kernel(reg->btf)) {
4298 		perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
4299 
4300 		/* Only unreferenced case accepts untrusted pointers */
4301 		if (kptr_field->type == BPF_KPTR_UNREF)
4302 			perm_flags |= PTR_UNTRUSTED;
4303 	} else {
4304 		perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
4305 		if (kptr_field->type == BPF_KPTR_PERCPU)
4306 			perm_flags |= MEM_PERCPU;
4307 	}
4308 
4309 	if (type_flag(reg->type) & ~perm_flags)
4310 		goto bad_type;
4311 
4312 	/* We need to verify reg->type and reg->btf, before accessing reg->btf */
4313 	reg_name = btf_type_name(reg->btf, reg->btf_id);
4314 
4315 	/* For ref_ptr case, release function check should ensure we get one
4316 	 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
4317 	 * normal store of unreferenced kptr, we must ensure var_off is zero.
4318 	 * Since ref_ptr cannot be accessed directly by BPF insns, check for
4319 	 * reg->id is not needed here.
4320 	 */
4321 	if (__check_ptr_off_reg(env, reg, argno_from_reg(regno), true))
4322 		return -EACCES;
4323 
4324 	/* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
4325 	 * we also need to take into account the reg->var_off.
4326 	 *
4327 	 * We want to support cases like:
4328 	 *
4329 	 * struct foo {
4330 	 *         struct bar br;
4331 	 *         struct baz bz;
4332 	 * };
4333 	 *
4334 	 * struct foo *v;
4335 	 * v = func();	      // PTR_TO_BTF_ID
4336 	 * val->foo = v;      // reg->var_off is zero, btf and btf_id match type
4337 	 * val->bar = &v->br; // reg->var_off is still zero, but we need to retry with
4338 	 *                    // first member type of struct after comparison fails
4339 	 * val->baz = &v->bz; // reg->var_off is non-zero, so struct needs to be walked
4340 	 *                    // to match type
4341 	 *
4342 	 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->var_off
4343 	 * is zero. We must also ensure that btf_struct_ids_match does not walk
4344 	 * the struct to match type against first member of struct, i.e. reject
4345 	 * second case from above. Hence, when type is BPF_KPTR_REF, we set
4346 	 * strict mode to true for type match.
4347 	 */
4348 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->var_off.value,
4349 				  kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4350 				  kptr_field->type != BPF_KPTR_UNREF))
4351 		goto bad_type;
4352 	return 0;
4353 bad_type:
4354 	verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
4355 		reg_type_str(env, reg->type), reg_name);
4356 	verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
4357 	if (kptr_field->type == BPF_KPTR_UNREF)
4358 		verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
4359 			targ_name);
4360 	else
4361 		verbose(env, "\n");
4362 	return -EINVAL;
4363 }
4364 
4365 static bool in_sleepable(struct bpf_verifier_env *env)
4366 {
4367 	return env->cur_state->in_sleepable;
4368 }
4369 
4370 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
4371  * can dereference RCU protected pointers and result is PTR_TRUSTED.
4372  */
4373 static bool in_rcu_cs(struct bpf_verifier_env *env)
4374 {
4375 	return env->cur_state->active_rcu_locks ||
4376 	       env->cur_state->active_locks ||
4377 	       !in_sleepable(env);
4378 }
4379 
4380 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
4381 BTF_SET_START(rcu_protected_types)
4382 #ifdef CONFIG_NET
4383 BTF_ID(struct, prog_test_ref_kfunc)
4384 #endif
4385 #ifdef CONFIG_CGROUPS
4386 BTF_ID(struct, cgroup)
4387 #endif
4388 #ifdef CONFIG_BPF_JIT
4389 BTF_ID(struct, bpf_cpumask)
4390 #endif
4391 BTF_ID(struct, task_struct)
4392 #ifdef CONFIG_CRYPTO
4393 BTF_ID(struct, bpf_crypto_ctx)
4394 #endif
4395 BTF_SET_END(rcu_protected_types)
4396 
4397 static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
4398 {
4399 	if (!btf_is_kernel(btf))
4400 		return true;
4401 	return btf_id_set_contains(&rcu_protected_types, btf_id);
4402 }
4403 
4404 static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
4405 {
4406 	struct btf_struct_meta *meta;
4407 
4408 	if (btf_is_kernel(kptr_field->kptr.btf))
4409 		return NULL;
4410 
4411 	meta = btf_find_struct_meta(kptr_field->kptr.btf,
4412 				    kptr_field->kptr.btf_id);
4413 
4414 	return meta ? meta->record : NULL;
4415 }
4416 
4417 static bool rcu_safe_kptr(const struct btf_field *field)
4418 {
4419 	const struct btf_field_kptr *kptr = &field->kptr;
4420 
4421 	return field->type == BPF_KPTR_PERCPU ||
4422 	       (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
4423 }
4424 
4425 static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
4426 {
4427 	struct btf_record *rec;
4428 	u32 ret;
4429 
4430 	ret = PTR_MAYBE_NULL;
4431 	if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
4432 		ret |= MEM_RCU;
4433 		if (kptr_field->type == BPF_KPTR_PERCPU)
4434 			ret |= MEM_PERCPU;
4435 		else if (!btf_is_kernel(kptr_field->kptr.btf))
4436 			ret |= MEM_ALLOC;
4437 
4438 		rec = kptr_pointee_btf_record(kptr_field);
4439 		if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
4440 			ret |= NON_OWN_REF;
4441 	} else {
4442 		ret |= PTR_UNTRUSTED;
4443 	}
4444 
4445 	return ret;
4446 }
4447 
4448 static int mark_uptr_ld_reg(struct bpf_verifier_env *env, u32 regno,
4449 			    struct btf_field *field)
4450 {
4451 	struct bpf_reg_state *reg;
4452 	const struct btf_type *t;
4453 
4454 	t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id);
4455 	mark_reg_known_zero(env, cur_regs(env), regno);
4456 	reg = reg_state(env, regno);
4457 	reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
4458 	reg->mem_size = t->size;
4459 	reg->id = ++env->id_gen;
4460 
4461 	return 0;
4462 }
4463 
4464 static int check_map_kptr_access(struct bpf_verifier_env *env,
4465 				 int value_regno, int insn_idx,
4466 				 struct btf_field *kptr_field)
4467 {
4468 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4469 	int class = BPF_CLASS(insn->code);
4470 	struct bpf_reg_state *val_reg;
4471 	int ret;
4472 
4473 	/* Things we already checked for in check_map_access and caller:
4474 	 *  - Reject cases where variable offset may touch kptr
4475 	 *  - size of access (must be BPF_DW)
4476 	 *  - tnum_is_const(reg->var_off)
4477 	 *  - kptr_field->offset == off + reg->var_off.value
4478 	 */
4479 	/* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
4480 	if (BPF_MODE(insn->code) != BPF_MEM) {
4481 		verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
4482 		return -EACCES;
4483 	}
4484 
4485 	/* We only allow loading referenced kptr, since it will be marked as
4486 	 * untrusted, similar to unreferenced kptr.
4487 	 */
4488 	if (class != BPF_LDX &&
4489 	    (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
4490 		verbose(env, "store to referenced kptr disallowed\n");
4491 		return -EACCES;
4492 	}
4493 	if (class != BPF_LDX && kptr_field->type == BPF_UPTR) {
4494 		verbose(env, "store to uptr disallowed\n");
4495 		return -EACCES;
4496 	}
4497 
4498 	if (class == BPF_LDX) {
4499 		if (kptr_field->type == BPF_UPTR)
4500 			return mark_uptr_ld_reg(env, value_regno, kptr_field);
4501 
4502 		/* We can simply mark the value_regno receiving the pointer
4503 		 * value from map as PTR_TO_BTF_ID, with the correct type.
4504 		 */
4505 		ret = mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID,
4506 				      kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4507 				      btf_ld_kptr_type(env, kptr_field));
4508 		if (ret < 0)
4509 			return ret;
4510 	} else if (class == BPF_STX) {
4511 		val_reg = reg_state(env, value_regno);
4512 		if (!bpf_register_is_null(val_reg) &&
4513 		    map_kptr_match_type(env, kptr_field, val_reg, value_regno))
4514 			return -EACCES;
4515 	} else if (class == BPF_ST) {
4516 		if (insn->imm) {
4517 			verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
4518 				kptr_field->offset);
4519 			return -EACCES;
4520 		}
4521 	} else {
4522 		verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
4523 		return -EACCES;
4524 	}
4525 	return 0;
4526 }
4527 
4528 /*
4529  * Return the size of the memory region accessible from a pointer to map value.
4530  * For INSN_ARRAY maps whole bpf_insn_array->ips array is accessible.
4531  */
4532 static u32 map_mem_size(const struct bpf_map *map)
4533 {
4534 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY)
4535 		return map->max_entries * sizeof(long);
4536 
4537 	return map->value_size;
4538 }
4539 
4540 /* check read/write into a map element with possible variable offset */
4541 static int check_map_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
4542 			    int off, int size, bool zero_size_allowed,
4543 			    enum bpf_access_src src)
4544 {
4545 	struct bpf_map *map = reg->map_ptr;
4546 	u32 mem_size = map_mem_size(map);
4547 	struct btf_record *rec;
4548 	int err, i;
4549 
4550 	err = check_mem_region_access(env, reg, argno, off, size, mem_size, zero_size_allowed);
4551 	if (err)
4552 		return err;
4553 
4554 	if (IS_ERR_OR_NULL(map->record))
4555 		return 0;
4556 	rec = map->record;
4557 	for (i = 0; i < rec->cnt; i++) {
4558 		struct btf_field *field = &rec->fields[i];
4559 		u32 p = field->offset;
4560 
4561 		/* If any part of a field  can be touched by load/store, reject
4562 		 * this program. To check that [x1, x2) overlaps with [y1, y2),
4563 		 * it is sufficient to check x1 < y2 && y1 < x2.
4564 		 */
4565 		if (reg_smin(reg) + off < p + field->size &&
4566 		    p < reg_umax(reg) + off + size) {
4567 			switch (field->type) {
4568 			case BPF_KPTR_UNREF:
4569 			case BPF_KPTR_REF:
4570 			case BPF_KPTR_PERCPU:
4571 			case BPF_UPTR:
4572 				if (src != ACCESS_DIRECT) {
4573 					verbose(env, "%s cannot be accessed indirectly by helper\n",
4574 						btf_field_type_name(field->type));
4575 					return -EACCES;
4576 				}
4577 				if (!tnum_is_const(reg->var_off)) {
4578 					verbose(env, "%s access cannot have variable offset\n",
4579 						btf_field_type_name(field->type));
4580 					return -EACCES;
4581 				}
4582 				if (p != off + reg->var_off.value) {
4583 					verbose(env, "%s access misaligned expected=%u off=%llu\n",
4584 						btf_field_type_name(field->type),
4585 						p, off + reg->var_off.value);
4586 					return -EACCES;
4587 				}
4588 				if (size != bpf_size_to_bytes(BPF_DW)) {
4589 					verbose(env, "%s access size must be BPF_DW\n",
4590 						btf_field_type_name(field->type));
4591 					return -EACCES;
4592 				}
4593 				break;
4594 			default:
4595 				verbose(env, "%s cannot be accessed directly by load/store\n",
4596 					btf_field_type_name(field->type));
4597 				return -EACCES;
4598 			}
4599 		}
4600 	}
4601 	return 0;
4602 }
4603 
4604 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
4605 			       const struct bpf_call_arg_meta *meta,
4606 			       enum bpf_access_type t)
4607 {
4608 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
4609 
4610 	switch (prog_type) {
4611 	/* Program types only with direct read access go here! */
4612 	case BPF_PROG_TYPE_LWT_IN:
4613 	case BPF_PROG_TYPE_LWT_OUT:
4614 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
4615 	case BPF_PROG_TYPE_SK_REUSEPORT:
4616 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
4617 	case BPF_PROG_TYPE_CGROUP_SKB:
4618 		if (t == BPF_WRITE)
4619 			return false;
4620 		fallthrough;
4621 
4622 	/* Program types with direct read + write access go here! */
4623 	case BPF_PROG_TYPE_SCHED_CLS:
4624 	case BPF_PROG_TYPE_SCHED_ACT:
4625 	case BPF_PROG_TYPE_XDP:
4626 	case BPF_PROG_TYPE_LWT_XMIT:
4627 	case BPF_PROG_TYPE_SK_SKB:
4628 	case BPF_PROG_TYPE_SK_MSG:
4629 		if (meta)
4630 			return meta->pkt_access;
4631 
4632 		env->seen_direct_write = true;
4633 		return true;
4634 
4635 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4636 		if (t == BPF_WRITE)
4637 			env->seen_direct_write = true;
4638 
4639 		return true;
4640 
4641 	default:
4642 		return false;
4643 	}
4644 }
4645 
4646 static int check_packet_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off,
4647 			       int size, bool zero_size_allowed)
4648 {
4649 	int err;
4650 
4651 	if (reg->range < 0) {
4652 		verbose(env, "%s offset is outside of the packet\n", reg_arg_name(env, argno));
4653 		return -EINVAL;
4654 	}
4655 
4656 	err = check_mem_region_access(env, reg, argno, off, size, reg->range, zero_size_allowed);
4657 	if (err)
4658 		return err;
4659 
4660 	/* __check_mem_access has made sure "off + size - 1" is within u16.
4661 	 * reg_umax(reg) can't be bigger than MAX_PACKET_OFF which is 0xffff,
4662 	 * otherwise find_good_pkt_pointers would have refused to set range info
4663 	 * that __check_mem_access would have rejected this pkt access.
4664 	 * Therefore, "off + reg_umax(reg) + size - 1" won't overflow u32.
4665 	 */
4666 	env->prog->aux->max_pkt_offset =
4667 		max_t(u32, env->prog->aux->max_pkt_offset,
4668 		      off + reg_umax(reg) + size - 1);
4669 
4670 	return 0;
4671 }
4672 
4673 static bool is_var_ctx_off_allowed(struct bpf_prog *prog)
4674 {
4675 	return resolve_prog_type(prog) == BPF_PROG_TYPE_SYSCALL;
4676 }
4677 
4678 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
4679 static int __check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
4680 			      enum bpf_access_type t, struct bpf_insn_access_aux *info)
4681 {
4682 	if (env->ops->is_valid_access &&
4683 	    env->ops->is_valid_access(off, size, t, env->prog, info)) {
4684 		/* A non zero info.ctx_field_size indicates that this field is a
4685 		 * candidate for later verifier transformation to load the whole
4686 		 * field and then apply a mask when accessed with a narrower
4687 		 * access than actual ctx access size. A zero info.ctx_field_size
4688 		 * will only allow for whole field access and rejects any other
4689 		 * type of narrower access.
4690 		 */
4691 		if (base_type(info->reg_type) == PTR_TO_BTF_ID) {
4692 			if (info->ref_id &&
4693 			    !find_reference_state(env->cur_state, info->ref_id)) {
4694 				verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n",
4695 					off);
4696 				return -EACCES;
4697 			}
4698 		} else {
4699 			env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size;
4700 		}
4701 		/* remember the offset of last byte accessed in ctx */
4702 		if (env->prog->aux->max_ctx_offset < off + size)
4703 			env->prog->aux->max_ctx_offset = off + size;
4704 		return 0;
4705 	}
4706 
4707 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
4708 	return -EACCES;
4709 }
4710 
4711 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *reg, argno_t argno,
4712 			    int off, int access_size, enum bpf_access_type t,
4713 			    struct bpf_insn_access_aux *info)
4714 {
4715 	/*
4716 	 * Program types that don't rewrite ctx accesses can safely
4717 	 * dereference ctx pointers with fixed offsets.
4718 	 */
4719 	bool var_off_ok = is_var_ctx_off_allowed(env->prog);
4720 	bool fixed_off_ok = !env->ops->convert_ctx_access;
4721 	int err;
4722 
4723 	if (var_off_ok)
4724 		err = check_mem_region_access(env, reg, argno, off, access_size, U16_MAX, false);
4725 	else
4726 		err = __check_ptr_off_reg(env, reg, argno, fixed_off_ok);
4727 	if (err)
4728 		return err;
4729 	off += reg_umax(reg);
4730 
4731 	err = __check_ctx_access(env, insn_idx, off, access_size, t, info);
4732 	if (err)
4733 		verbose_linfo(env, insn_idx, "; ");
4734 	return err;
4735 }
4736 
4737 static int check_flow_keys_access(struct bpf_verifier_env *env,
4738 				  struct bpf_reg_state *reg, argno_t argno,
4739 				  int off, int size)
4740 {
4741 	/* Only a constant offset is allowed here; fold it into off. */
4742 	if (!tnum_is_const(reg->var_off)) {
4743 		char tn_buf[48];
4744 
4745 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4746 		verbose(env, "%s invalid variable offset to flow keys: off=%d, var_off=%s\n",
4747 			reg_arg_name(env, argno), off, tn_buf);
4748 		return -EACCES;
4749 	}
4750 	off += reg->var_off.value;
4751 
4752 	if (size < 0 || off < 0 ||
4753 	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
4754 		verbose(env, "invalid access to flow keys off=%d size=%d\n",
4755 			off, size);
4756 		return -EACCES;
4757 	}
4758 	return 0;
4759 }
4760 
4761 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
4762 			     struct bpf_reg_state *reg, argno_t argno, int off, int size,
4763 			     enum bpf_access_type t)
4764 {
4765 	struct bpf_insn_access_aux info = {};
4766 	bool valid;
4767 
4768 	if (reg_smin(reg) < 0) {
4769 		verbose(env, "%s min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4770 			reg_arg_name(env, argno));
4771 		return -EACCES;
4772 	}
4773 
4774 	switch (reg->type) {
4775 	case PTR_TO_SOCK_COMMON:
4776 		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
4777 		break;
4778 	case PTR_TO_SOCKET:
4779 		valid = bpf_sock_is_valid_access(off, size, t, &info);
4780 		break;
4781 	case PTR_TO_TCP_SOCK:
4782 		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
4783 		break;
4784 	case PTR_TO_XDP_SOCK:
4785 		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
4786 		break;
4787 	default:
4788 		valid = false;
4789 	}
4790 
4791 
4792 	if (valid) {
4793 		env->insn_aux_data[insn_idx].ctx_field_size =
4794 			info.ctx_field_size;
4795 		return 0;
4796 	}
4797 
4798 	verbose(env, "%s invalid %s access off=%d size=%d\n",
4799 		reg_arg_name(env, argno), reg_type_str(env, reg->type), off, size);
4800 
4801 	return -EACCES;
4802 }
4803 
4804 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
4805 {
4806 	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4807 }
4808 
4809 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
4810 {
4811 	const struct bpf_reg_state *reg = reg_state(env, regno);
4812 
4813 	return reg->type == PTR_TO_CTX;
4814 }
4815 
4816 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
4817 {
4818 	const struct bpf_reg_state *reg = reg_state(env, regno);
4819 
4820 	return type_is_sk_pointer(reg->type);
4821 }
4822 
4823 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
4824 {
4825 	const struct bpf_reg_state *reg = reg_state(env, regno);
4826 
4827 	return type_is_pkt_pointer(reg->type);
4828 }
4829 
4830 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
4831 {
4832 	const struct bpf_reg_state *reg = reg_state(env, regno);
4833 
4834 	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
4835 	return reg->type == PTR_TO_FLOW_KEYS;
4836 }
4837 
4838 static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
4839 {
4840 	const struct bpf_reg_state *reg = reg_state(env, regno);
4841 
4842 	return reg->type == PTR_TO_ARENA;
4843 }
4844 
4845 /* Return false if @regno contains a pointer whose type isn't supported for
4846  * atomic instruction @insn.
4847  */
4848 static bool atomic_ptr_type_ok(struct bpf_verifier_env *env, int regno,
4849 			       struct bpf_insn *insn)
4850 {
4851 	if (is_ctx_reg(env, regno))
4852 		return false;
4853 	if (is_pkt_reg(env, regno))
4854 		return false;
4855 	if (is_flow_key_reg(env, regno))
4856 		return false;
4857 	if (is_sk_reg(env, regno))
4858 		return false;
4859 	if (is_arena_reg(env, regno))
4860 		return bpf_jit_supports_insn(insn, true);
4861 
4862 	return true;
4863 }
4864 
4865 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
4866 #ifdef CONFIG_NET
4867 	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
4868 	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
4869 	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
4870 #endif
4871 	[CONST_PTR_TO_MAP] = btf_bpf_map_id,
4872 };
4873 
4874 static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
4875 {
4876 	/* A referenced register is always trusted. */
4877 	if (reg_is_referenced(env, reg))
4878 		return true;
4879 
4880 	/* Types listed in the reg2btf_ids are always trusted */
4881 	if (reg2btf_ids[base_type(reg->type)] &&
4882 	    !bpf_type_has_unsafe_modifiers(reg->type))
4883 		return true;
4884 
4885 	/* If a register is not referenced, it is trusted if it has the
4886 	 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
4887 	 * other type modifiers may be safe, but we elect to take an opt-in
4888 	 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
4889 	 * not.
4890 	 *
4891 	 * Eventually, we should make PTR_TRUSTED the single source of truth
4892 	 * for whether a register is trusted.
4893 	 */
4894 	return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
4895 	       !bpf_type_has_unsafe_modifiers(reg->type);
4896 }
4897 
4898 static bool is_rcu_reg(const struct bpf_reg_state *reg)
4899 {
4900 	return reg->type & MEM_RCU;
4901 }
4902 
4903 static void clear_trusted_flags(enum bpf_type_flag *flag)
4904 {
4905 	*flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU);
4906 }
4907 
4908 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
4909 				   const struct bpf_reg_state *reg,
4910 				   int off, int size, bool strict)
4911 {
4912 	struct tnum reg_off;
4913 	int ip_align;
4914 
4915 	/* Byte size accesses are always allowed. */
4916 	if (!strict || size == 1)
4917 		return 0;
4918 
4919 	/* For platforms that do not have a Kconfig enabling
4920 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
4921 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
4922 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
4923 	 * to this code only in strict mode where we want to emulate
4924 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
4925 	 * unconditional IP align value of '2'.
4926 	 */
4927 	ip_align = 2;
4928 
4929 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + off));
4930 	if (!tnum_is_aligned(reg_off, size)) {
4931 		char tn_buf[48];
4932 
4933 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4934 		verbose(env,
4935 			"misaligned packet access off %d+%s+%d size %d\n",
4936 			ip_align, tn_buf, off, size);
4937 		return -EACCES;
4938 	}
4939 
4940 	return 0;
4941 }
4942 
4943 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
4944 				       const struct bpf_reg_state *reg,
4945 				       const char *pointer_desc,
4946 				       int off, int size, bool strict)
4947 {
4948 	struct tnum reg_off;
4949 
4950 	/* Byte size accesses are always allowed. */
4951 	if (!strict || size == 1)
4952 		return 0;
4953 
4954 	reg_off = tnum_add(reg->var_off, tnum_const(off));
4955 	if (!tnum_is_aligned(reg_off, size)) {
4956 		char tn_buf[48];
4957 
4958 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4959 		verbose(env, "misaligned %saccess off %s+%d size %d\n",
4960 			pointer_desc, tn_buf, off, size);
4961 		return -EACCES;
4962 	}
4963 
4964 	return 0;
4965 }
4966 
4967 static int check_ptr_alignment(struct bpf_verifier_env *env,
4968 			       const struct bpf_reg_state *reg, int off,
4969 			       int size, bool strict_alignment_once)
4970 {
4971 	bool strict = env->strict_alignment || strict_alignment_once;
4972 	const char *pointer_desc = "";
4973 
4974 	switch (reg->type) {
4975 	case PTR_TO_PACKET:
4976 	case PTR_TO_PACKET_META:
4977 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
4978 		 * right in front, treat it the very same way.
4979 		 */
4980 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
4981 	case PTR_TO_FLOW_KEYS:
4982 		pointer_desc = "flow keys ";
4983 		break;
4984 	case PTR_TO_MAP_KEY:
4985 		pointer_desc = "key ";
4986 		break;
4987 	case PTR_TO_MAP_VALUE:
4988 		pointer_desc = "value ";
4989 		if (reg->map_ptr->map_type == BPF_MAP_TYPE_INSN_ARRAY)
4990 			strict = true;
4991 		break;
4992 	case PTR_TO_CTX:
4993 		pointer_desc = "context ";
4994 		break;
4995 	case PTR_TO_STACK:
4996 		pointer_desc = "stack ";
4997 		/* The stack spill tracking logic in check_stack_write_fixed_off()
4998 		 * and check_stack_read_fixed_off() relies on stack accesses being
4999 		 * aligned.
5000 		 */
5001 		strict = true;
5002 		break;
5003 	case PTR_TO_SOCKET:
5004 		pointer_desc = "sock ";
5005 		break;
5006 	case PTR_TO_SOCK_COMMON:
5007 		pointer_desc = "sock_common ";
5008 		break;
5009 	case PTR_TO_TCP_SOCK:
5010 		pointer_desc = "tcp_sock ";
5011 		break;
5012 	case PTR_TO_XDP_SOCK:
5013 		pointer_desc = "xdp_sock ";
5014 		break;
5015 	case PTR_TO_ARENA:
5016 		return 0;
5017 	default:
5018 		break;
5019 	}
5020 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
5021 					   strict);
5022 }
5023 
5024 static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog)
5025 {
5026 	if (!bpf_jit_supports_private_stack())
5027 		return NO_PRIV_STACK;
5028 
5029 	/* bpf_prog_check_recur() checks all prog types that use bpf trampoline
5030 	 * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked
5031 	 * explicitly.
5032 	 */
5033 	switch (prog->type) {
5034 	case BPF_PROG_TYPE_KPROBE:
5035 	case BPF_PROG_TYPE_TRACEPOINT:
5036 	case BPF_PROG_TYPE_PERF_EVENT:
5037 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
5038 		return PRIV_STACK_ADAPTIVE;
5039 	case BPF_PROG_TYPE_TRACING:
5040 	case BPF_PROG_TYPE_LSM:
5041 	case BPF_PROG_TYPE_STRUCT_OPS:
5042 		if (prog->aux->priv_stack_requested || bpf_prog_check_recur(prog))
5043 			return PRIV_STACK_ADAPTIVE;
5044 		fallthrough;
5045 	default:
5046 		break;
5047 	}
5048 
5049 	return NO_PRIV_STACK;
5050 }
5051 
5052 static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
5053 {
5054 	if (env->prog->jit_requested)
5055 		return round_up(stack_depth, 16);
5056 
5057 	/* round up to 32-bytes, since this is granularity
5058 	 * of interpreter stack size
5059 	 */
5060 	return round_up(max_t(u32, stack_depth, 1), 32);
5061 }
5062 
5063 /* temporary state used for call frame depth calculation */
5064 struct bpf_subprog_call_depth_info {
5065 	int ret_insn; /* caller instruction where we return to. */
5066 	int caller; /* caller subprogram idx */
5067 	int frame; /* # of consecutive static call stack frames on top of stack */
5068 };
5069 
5070 /* starting from main bpf function walk all instructions of the function
5071  * and recursively walk all callees that given function can call.
5072  * Ignore jump and exit insns.
5073  */
5074 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
5075 					 struct bpf_subprog_call_depth_info *dinfo,
5076 					 bool priv_stack_supported)
5077 {
5078 	struct bpf_subprog_info *subprog = env->subprog_info;
5079 	struct bpf_insn *insn = env->prog->insnsi;
5080 	int depth = 0, frame = 0, i, subprog_end, subprog_depth;
5081 	bool tail_call_reachable = false;
5082 	int total;
5083 	int tmp;
5084 
5085 	/* no caller idx */
5086 	dinfo[idx].caller = -1;
5087 
5088 	i = subprog[idx].start;
5089 	if (!priv_stack_supported)
5090 		subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5091 process_func:
5092 	/* protect against potential stack overflow that might happen when
5093 	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
5094 	 * depth for such case down to 256 so that the worst case scenario
5095 	 * would result in 8k stack size (32 which is tailcall limit * 256 =
5096 	 * 8k).
5097 	 *
5098 	 * To get the idea what might happen, see an example:
5099 	 * func1 -> sub rsp, 128
5100 	 *  subfunc1 -> sub rsp, 256
5101 	 *  tailcall1 -> add rsp, 256
5102 	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
5103 	 *   subfunc2 -> sub rsp, 64
5104 	 *   subfunc22 -> sub rsp, 128
5105 	 *   tailcall2 -> add rsp, 128
5106 	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
5107 	 *
5108 	 * tailcall will unwind the current stack frame but it will not get rid
5109 	 * of caller's stack as shown on the example above.
5110 	 */
5111 	if (idx && subprog[idx].has_tail_call && depth >= 256) {
5112 		verbose(env,
5113 			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
5114 			depth);
5115 		return -EACCES;
5116 	}
5117 
5118 	subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth);
5119 	if (IS_ENABLED(CONFIG_X86_64) && subprog[idx].stack_arg_cnt) {
5120 		/* x86-64 uses R9 for both private stack frame pointer and arg6. */
5121 		subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5122 	} else if (priv_stack_supported) {
5123 		/* Request private stack support only if the subprog stack
5124 		 * depth is no less than BPF_PRIV_STACK_MIN_SIZE. This is to
5125 		 * avoid jit penalty if the stack usage is small.
5126 		 */
5127 		if (subprog[idx].priv_stack_mode == PRIV_STACK_UNKNOWN &&
5128 		    subprog_depth >= BPF_PRIV_STACK_MIN_SIZE)
5129 			subprog[idx].priv_stack_mode = PRIV_STACK_ADAPTIVE;
5130 	}
5131 
5132 	if (subprog[idx].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
5133 		if (subprog_depth > env->max_stack_depth)
5134 			env->max_stack_depth = subprog_depth;
5135 		if (subprog_depth > MAX_BPF_STACK) {
5136 			verbose(env, "stack size of subprog %d is %d. Too large\n",
5137 				idx, subprog_depth);
5138 			return -EACCES;
5139 		}
5140 	} else {
5141 		depth += subprog_depth;
5142 		if (depth > env->max_stack_depth)
5143 			env->max_stack_depth = depth;
5144 		if (depth > MAX_BPF_STACK) {
5145 			total = 0;
5146 			for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller)
5147 				total++;
5148 
5149 			verbose(env, "combined stack size of %d calls is %d. Too large\n",
5150 				total, depth);
5151 			return -EACCES;
5152 		}
5153 	}
5154 continue_func:
5155 	subprog_end = subprog[idx + 1].start;
5156 	for (; i < subprog_end; i++) {
5157 		int next_insn, sidx;
5158 
5159 		if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) {
5160 			bool err = false;
5161 
5162 			if (!bpf_is_throw_kfunc(insn + i))
5163 				continue;
5164 			for (tmp = idx; tmp >= 0 && !err; tmp = dinfo[tmp].caller) {
5165 				if (subprog[tmp].is_cb) {
5166 					err = true;
5167 					break;
5168 				}
5169 			}
5170 			if (!err)
5171 				continue;
5172 			verbose(env,
5173 				"bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
5174 				i, idx);
5175 			return -EINVAL;
5176 		}
5177 
5178 		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
5179 			continue;
5180 		/* remember insn and function to return to */
5181 
5182 		/* find the callee */
5183 		next_insn = i + insn[i].imm + 1;
5184 		sidx = bpf_find_subprog(env, next_insn);
5185 		if (verifier_bug_if(sidx < 0, env, "callee not found at insn %d", next_insn))
5186 			return -EFAULT;
5187 		if (subprog[sidx].is_async_cb) {
5188 			if (subprog[sidx].has_tail_call) {
5189 				verifier_bug(env, "subprog has tail_call and async cb");
5190 				return -EFAULT;
5191 			}
5192 			/* async callbacks don't increase bpf prog stack size unless called directly */
5193 			if (!bpf_pseudo_call(insn + i))
5194 				continue;
5195 			if (subprog[sidx].is_exception_cb) {
5196 				verbose(env, "insn %d cannot call exception cb directly", i);
5197 				return -EINVAL;
5198 			}
5199 		}
5200 
5201 		/* store caller info for after we return from callee */
5202 		dinfo[idx].frame = frame;
5203 		dinfo[idx].ret_insn = i + 1;
5204 
5205 		/* push caller idx into callee's dinfo */
5206 		dinfo[sidx].caller = idx;
5207 
5208 		i = next_insn;
5209 
5210 		idx = sidx;
5211 		if (!priv_stack_supported)
5212 			subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5213 
5214 		if (subprog[idx].has_tail_call)
5215 			tail_call_reachable = true;
5216 
5217 		frame = bpf_subprog_is_global(env, idx) ? 0 : frame + 1;
5218 		if (frame >= MAX_CALL_FRAMES) {
5219 			verbose(env, "the call stack of %d frames is too deep !\n",
5220 				frame);
5221 			return -E2BIG;
5222 		}
5223 		goto process_func;
5224 	}
5225 	/* if tail call got detected across bpf2bpf calls then mark each of the
5226 	 * currently present subprog frames as tail call reachable subprogs;
5227 	 * this info will be utilized by JIT so that we will be preserving the
5228 	 * tail call counter throughout bpf2bpf calls combined with tailcalls
5229 	 */
5230 	if (tail_call_reachable) {
5231 		for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller) {
5232 			if (subprog[tmp].is_exception_cb) {
5233 				verbose(env, "cannot tail call within exception cb\n");
5234 				return -EINVAL;
5235 			}
5236 			if (subprog[tmp].stack_arg_cnt) {
5237 				verbose(env, "tail_calls are not allowed in programs with stack args\n");
5238 				return -EINVAL;
5239 			}
5240 			subprog[tmp].tail_call_reachable = true;
5241 		}
5242 	} else if (!idx && subprog[0].has_tail_call && subprog[0].stack_arg_cnt) {
5243 		verbose(env, "tail_calls are not allowed in programs with stack args\n");
5244 		return -EINVAL;
5245 	}
5246 
5247 	if (subprog[0].tail_call_reachable)
5248 		env->prog->aux->tail_call_reachable = true;
5249 
5250 	/* end of for() loop means the last insn of the 'subprog'
5251 	 * was reached. Doesn't matter whether it was JA or EXIT
5252 	 */
5253 	if (frame == 0 && dinfo[idx].caller < 0)
5254 		return 0;
5255 	if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE)
5256 		depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
5257 
5258 	/* pop caller idx from callee */
5259 	idx = dinfo[idx].caller;
5260 
5261 	/* retrieve caller state from its frame */
5262 	frame = dinfo[idx].frame;
5263 	i = dinfo[idx].ret_insn;
5264 
5265 	/* reset tail_call_reachable to the parent's actual state */
5266 	tail_call_reachable = subprog[idx].tail_call_reachable;
5267 
5268 	goto continue_func;
5269 }
5270 
5271 static int check_max_stack_depth(struct bpf_verifier_env *env)
5272 {
5273 	enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN;
5274 	struct bpf_subprog_call_depth_info *dinfo;
5275 	struct bpf_subprog_info *si = env->subprog_info;
5276 	bool priv_stack_supported;
5277 	int ret;
5278 
5279 	dinfo = kvcalloc(env->subprog_cnt, sizeof(*dinfo), GFP_KERNEL_ACCOUNT);
5280 	if (!dinfo)
5281 		return -ENOMEM;
5282 
5283 	for (int i = 0; i < env->subprog_cnt; i++) {
5284 		if (si[i].has_tail_call) {
5285 			priv_stack_mode = NO_PRIV_STACK;
5286 			break;
5287 		}
5288 	}
5289 
5290 	if (priv_stack_mode == PRIV_STACK_UNKNOWN)
5291 		priv_stack_mode = bpf_enable_priv_stack(env->prog);
5292 
5293 	/* All async_cb subprogs use normal kernel stack. If a particular
5294 	 * subprog appears in both main prog and async_cb subtree, that
5295 	 * subprog will use normal kernel stack to avoid potential nesting.
5296 	 * The reverse subprog traversal ensures when main prog subtree is
5297 	 * checked, the subprogs appearing in async_cb subtrees are already
5298 	 * marked as using normal kernel stack, so stack size checking can
5299 	 * be done properly.
5300 	 */
5301 	for (int i = env->subprog_cnt - 1; i >= 0; i--) {
5302 		if (!i || si[i].is_async_cb) {
5303 			priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE;
5304 			ret = check_max_stack_depth_subprog(env, i, dinfo,
5305 					priv_stack_supported);
5306 			if (ret < 0) {
5307 				kvfree(dinfo);
5308 				return ret;
5309 			}
5310 		}
5311 	}
5312 
5313 	for (int i = 0; i < env->subprog_cnt; i++) {
5314 		if (si[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
5315 			env->prog->aux->jits_use_priv_stack = true;
5316 			break;
5317 		}
5318 	}
5319 
5320 	kvfree(dinfo);
5321 
5322 	return 0;
5323 }
5324 
5325 static int __check_buffer_access(struct bpf_verifier_env *env,
5326 				 const char *buf_info,
5327 				 const struct bpf_reg_state *reg,
5328 				 argno_t argno, int off, int size)
5329 {
5330 	if (off < 0) {
5331 		verbose(env,
5332 			"%s invalid %s buffer access: off=%d, size=%d\n",
5333 			reg_arg_name(env, argno), buf_info, off, size);
5334 		return -EACCES;
5335 	}
5336 	if (!tnum_is_const(reg->var_off)) {
5337 		char tn_buf[48];
5338 
5339 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5340 		verbose(env,
5341 			"%s invalid variable buffer offset: off=%d, var_off=%s\n",
5342 			reg_arg_name(env, argno), off, tn_buf);
5343 		return -EACCES;
5344 	}
5345 
5346 	return 0;
5347 }
5348 
5349 static int check_tp_buffer_access(struct bpf_verifier_env *env,
5350 				  const struct bpf_reg_state *reg,
5351 				  argno_t argno, int off, int size)
5352 {
5353 	int err;
5354 
5355 	err = __check_buffer_access(env, "tracepoint", reg, argno, off, size);
5356 	if (err)
5357 		return err;
5358 
5359 	env->prog->aux->max_tp_access = max(reg->var_off.value + off + size,
5360 					    env->prog->aux->max_tp_access);
5361 
5362 	return 0;
5363 }
5364 
5365 static int check_buffer_access(struct bpf_verifier_env *env,
5366 			       const struct bpf_reg_state *reg,
5367 			       argno_t argno, int off, int size,
5368 			       bool zero_size_allowed,
5369 			       u32 *max_access)
5370 {
5371 	const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
5372 	int err;
5373 
5374 	err = __check_buffer_access(env, buf_info, reg, argno, off, size);
5375 	if (err)
5376 		return err;
5377 
5378 	*max_access = max(reg->var_off.value + off + size, *max_access);
5379 
5380 	return 0;
5381 }
5382 
5383 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
5384 static void zext_32_to_64(struct bpf_reg_state *reg)
5385 {
5386 	reg->var_off = tnum_subreg(reg->var_off);
5387 	reg_set_urange64(reg, reg_u32_min(reg), reg_u32_max(reg));
5388 }
5389 
5390 /* truncate register to smaller size (in bytes)
5391  * must be called with size < BPF_REG_SIZE
5392  */
5393 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
5394 {
5395 	u64 mask;
5396 
5397 	/* clear high bits in bit representation */
5398 	reg->var_off = tnum_cast(reg->var_off, size);
5399 
5400 	/* fix arithmetic bounds */
5401 	mask = ((u64)1 << (size * 8)) - 1;
5402 	if ((reg_umin(reg) & ~mask) == (reg_umax(reg) & ~mask))
5403 		reg_set_urange64(reg, reg_umin(reg) & mask, reg_umax(reg) & mask);
5404 	else
5405 		reg_set_urange64(reg, 0, mask);
5406 
5407 	/* If size is smaller than 32bit register the 32bit register
5408 	 * values are also truncated so we push 64-bit bounds into
5409 	 * 32-bit bounds. Above were truncated < 32-bits already.
5410 	 */
5411 	if (size < 4)
5412 		__mark_reg32_unbounded(reg);
5413 
5414 	reg_bounds_sync(reg);
5415 }
5416 
5417 static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
5418 {
5419 	if (size == 1) {
5420 		reg_set_srange64(reg, S8_MIN, S8_MAX);
5421 		reg_set_srange32(reg, S8_MIN, S8_MAX);
5422 	} else if (size == 2) {
5423 		reg_set_srange64(reg, S16_MIN, S16_MAX);
5424 		reg_set_srange32(reg, S16_MIN, S16_MAX);
5425 	} else {
5426 		/* size == 4 */
5427 		reg_set_srange64(reg, S32_MIN, S32_MAX);
5428 		reg_set_srange32(reg, S32_MIN, S32_MAX);
5429 	}
5430 	reg->var_off = tnum_unknown;
5431 }
5432 
5433 static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
5434 {
5435 	s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval;
5436 	u64 top_smax_value, top_smin_value;
5437 	u64 num_bits = size * 8;
5438 
5439 	if (tnum_is_const(reg->var_off)) {
5440 		u64_cval = reg->var_off.value;
5441 		if (size == 1)
5442 			reg->var_off = tnum_const((s8)u64_cval);
5443 		else if (size == 2)
5444 			reg->var_off = tnum_const((s16)u64_cval);
5445 		else
5446 			/* size == 4 */
5447 			reg->var_off = tnum_const((s32)u64_cval);
5448 
5449 		u64_cval = reg->var_off.value;
5450 		reg->r64 = cnum64_from_urange(u64_cval, u64_cval);
5451 		reg->r32 = cnum32_from_urange((u32)u64_cval, (u32)u64_cval);
5452 		return;
5453 	}
5454 
5455 	top_smax_value = ((u64)reg_smax(reg) >> num_bits) << num_bits;
5456 	top_smin_value = ((u64)reg_smin(reg) >> num_bits) << num_bits;
5457 
5458 	if (top_smax_value != top_smin_value)
5459 		goto out;
5460 
5461 	/* find the s64_min and s64_min after sign extension */
5462 	if (size == 1) {
5463 		init_s64_max = (s8)reg_smax(reg);
5464 		init_s64_min = (s8)reg_smin(reg);
5465 	} else if (size == 2) {
5466 		init_s64_max = (s16)reg_smax(reg);
5467 		init_s64_min = (s16)reg_smin(reg);
5468 	} else {
5469 		init_s64_max = (s32)reg_smax(reg);
5470 		init_s64_min = (s32)reg_smin(reg);
5471 	}
5472 
5473 	s64_max = max(init_s64_max, init_s64_min);
5474 	s64_min = min(init_s64_max, init_s64_min);
5475 
5476 	/* both of s64_max/s64_min positive or negative */
5477 	if ((s64_max >= 0) == (s64_min >= 0)) {
5478 		reg_set_srange64(reg, s64_min, s64_max);
5479 		reg_set_srange32(reg, s64_min, s64_max);
5480 		reg->var_off = tnum_range(s64_min, s64_max);
5481 		return;
5482 	}
5483 
5484 out:
5485 	set_sext64_default_val(reg, size);
5486 }
5487 
5488 static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
5489 {
5490 	if (size == 1)
5491 		reg_set_srange32(reg, S8_MIN, S8_MAX);
5492 	else
5493 		/* size == 2 */
5494 		reg_set_srange32(reg, S16_MIN, S16_MAX);
5495 	reg->var_off = tnum_subreg(tnum_unknown);
5496 }
5497 
5498 static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
5499 {
5500 	s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val;
5501 	u32 top_smax_value, top_smin_value;
5502 	u32 num_bits = size * 8;
5503 
5504 	if (tnum_is_const(reg->var_off)) {
5505 		u32_val = reg->var_off.value;
5506 		if (size == 1)
5507 			reg->var_off = tnum_const((s8)u32_val);
5508 		else
5509 			reg->var_off = tnum_const((s16)u32_val);
5510 
5511 		u32_val = reg->var_off.value;
5512 		reg_set_srange32(reg, u32_val, u32_val);
5513 		return;
5514 	}
5515 
5516 	top_smax_value = ((u32)reg_s32_max(reg) >> num_bits) << num_bits;
5517 	top_smin_value = ((u32)reg_s32_min(reg) >> num_bits) << num_bits;
5518 
5519 	if (top_smax_value != top_smin_value)
5520 		goto out;
5521 
5522 	/* find the s32_min and s32_min after sign extension */
5523 	if (size == 1) {
5524 		init_s32_max = (s8)reg_s32_max(reg);
5525 		init_s32_min = (s8)reg_s32_min(reg);
5526 	} else {
5527 		/* size == 2 */
5528 		init_s32_max = (s16)reg_s32_max(reg);
5529 		init_s32_min = (s16)reg_s32_min(reg);
5530 	}
5531 	s32_max = max(init_s32_max, init_s32_min);
5532 	s32_min = min(init_s32_max, init_s32_min);
5533 
5534 	if ((s32_min >= 0) == (s32_max >= 0)) {
5535 		reg_set_srange32(reg, s32_min, s32_max);
5536 		reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
5537 		return;
5538 	}
5539 
5540 out:
5541 	set_sext32_default_val(reg, size);
5542 }
5543 
5544 bool bpf_map_is_rdonly(const struct bpf_map *map)
5545 {
5546 	/* A map is considered read-only if the following condition are true:
5547 	 *
5548 	 * 1) BPF program side cannot change any of the map content. The
5549 	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
5550 	 *    and was set at map creation time.
5551 	 * 2) The map value(s) have been initialized from user space by a
5552 	 *    loader and then "frozen", such that no new map update/delete
5553 	 *    operations from syscall side are possible for the rest of
5554 	 *    the map's lifetime from that point onwards.
5555 	 * 3) Any parallel/pending map update/delete operations from syscall
5556 	 *    side have been completed. Only after that point, it's safe to
5557 	 *    assume that map value(s) are immutable.
5558 	 */
5559 	return (map->map_flags & BPF_F_RDONLY_PROG) &&
5560 	       READ_ONCE(map->frozen) &&
5561 	       !bpf_map_write_active(map);
5562 }
5563 
5564 int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
5565 			bool is_ldsx)
5566 {
5567 	void *ptr;
5568 	u64 addr;
5569 	int err;
5570 
5571 	err = map->ops->map_direct_value_addr(map, &addr, off);
5572 	if (err)
5573 		return err;
5574 	ptr = (void *)(long)addr + off;
5575 
5576 	switch (size) {
5577 	case sizeof(u8):
5578 		*val = is_ldsx ? (s64)*(s8 *)ptr : (u64)*(u8 *)ptr;
5579 		break;
5580 	case sizeof(u16):
5581 		*val = is_ldsx ? (s64)*(s16 *)ptr : (u64)*(u16 *)ptr;
5582 		break;
5583 	case sizeof(u32):
5584 		*val = is_ldsx ? (s64)*(s32 *)ptr : (u64)*(u32 *)ptr;
5585 		break;
5586 	case sizeof(u64):
5587 		*val = *(u64 *)ptr;
5588 		break;
5589 	default:
5590 		return -EINVAL;
5591 	}
5592 	return 0;
5593 }
5594 
5595 #define BTF_TYPE_SAFE_RCU(__type)  __PASTE(__type, __safe_rcu)
5596 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type)  __PASTE(__type, __safe_rcu_or_null)
5597 #define BTF_TYPE_SAFE_TRUSTED(__type)  __PASTE(__type, __safe_trusted)
5598 #define BTF_TYPE_SAFE_TRUSTED_OR_NULL(__type)  __PASTE(__type, __safe_trusted_or_null)
5599 
5600 /*
5601  * Allow list few fields as RCU trusted or full trusted.
5602  * This logic doesn't allow mix tagging and will be removed once GCC supports
5603  * btf_type_tag.
5604  */
5605 
5606 /* RCU trusted: these fields are trusted in RCU CS and never NULL */
5607 BTF_TYPE_SAFE_RCU(struct task_struct) {
5608 	const cpumask_t *cpus_ptr;
5609 	struct css_set __rcu *cgroups;
5610 	struct task_struct __rcu *real_parent;
5611 	struct task_struct *group_leader;
5612 };
5613 
5614 BTF_TYPE_SAFE_RCU(struct cgroup) {
5615 	/* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
5616 	struct kernfs_node *kn;
5617 };
5618 
5619 BTF_TYPE_SAFE_RCU(struct css_set) {
5620 	struct cgroup *dfl_cgrp;
5621 };
5622 
5623 BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state) {
5624 	struct cgroup *cgroup;
5625 };
5626 
5627 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */
5628 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
5629 	struct file __rcu *exe_file;
5630 #ifdef CONFIG_MEMCG
5631 	struct task_struct __rcu *owner;
5632 #endif
5633 };
5634 
5635 /* skb->sk, req->sk are not RCU protected, but we mark them as such
5636  * because bpf prog accessible sockets are SOCK_RCU_FREE.
5637  */
5638 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
5639 	struct sock *sk;
5640 };
5641 
5642 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
5643 	struct sock *sk;
5644 };
5645 
5646 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */
5647 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
5648 	struct seq_file *seq;
5649 };
5650 
5651 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
5652 	struct bpf_iter_meta *meta;
5653 	struct task_struct *task;
5654 };
5655 
5656 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
5657 	struct file *file;
5658 };
5659 
5660 BTF_TYPE_SAFE_TRUSTED(struct file) {
5661 	struct inode *f_inode;
5662 };
5663 
5664 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) {
5665 	struct inode *d_inode;
5666 };
5667 
5668 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
5669 	struct sock *sk;
5670 };
5671 
5672 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct) {
5673 	struct mm_struct *vm_mm;
5674 	struct file *vm_file;
5675 };
5676 
5677 static bool type_is_rcu(struct bpf_verifier_env *env,
5678 			struct bpf_reg_state *reg,
5679 			const char *field_name, u32 btf_id)
5680 {
5681 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
5682 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
5683 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
5684 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state));
5685 
5686 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
5687 }
5688 
5689 static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
5690 				struct bpf_reg_state *reg,
5691 				const char *field_name, u32 btf_id)
5692 {
5693 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
5694 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
5695 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
5696 
5697 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null");
5698 }
5699 
5700 static bool type_is_trusted(struct bpf_verifier_env *env,
5701 			    struct bpf_reg_state *reg,
5702 			    const char *field_name, u32 btf_id)
5703 {
5704 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
5705 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
5706 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
5707 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
5708 
5709 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
5710 }
5711 
5712 static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
5713 				    struct bpf_reg_state *reg,
5714 				    const char *field_name, u32 btf_id)
5715 {
5716 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
5717 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry));
5718 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct));
5719 
5720 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
5721 					  "__safe_trusted_or_null");
5722 }
5723 
5724 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
5725 				   struct bpf_reg_state *regs, struct bpf_reg_state *reg,
5726 				   argno_t argno, int off, int size,
5727 				   enum bpf_access_type atype,
5728 				   int value_regno)
5729 {
5730 	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
5731 	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
5732 	const char *field_name = NULL;
5733 	enum bpf_type_flag flag = 0;
5734 	u32 btf_id = 0;
5735 	int ret;
5736 
5737 	if (!env->allow_ptr_leaks) {
5738 		verbose(env,
5739 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5740 			tname);
5741 		return -EPERM;
5742 	}
5743 	if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
5744 		verbose(env,
5745 			"Cannot access kernel 'struct %s' from non-GPL compatible program\n",
5746 			tname);
5747 		return -EINVAL;
5748 	}
5749 
5750 	if (!tnum_is_const(reg->var_off)) {
5751 		char tn_buf[48];
5752 
5753 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5754 		verbose(env,
5755 			"%s is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
5756 			reg_arg_name(env, argno), tname, off, tn_buf);
5757 		return -EACCES;
5758 	}
5759 
5760 	off += reg->var_off.value;
5761 
5762 	if (off < 0) {
5763 		verbose(env,
5764 			"%s is ptr_%s invalid negative access: off=%d\n",
5765 			reg_arg_name(env, argno), tname, off);
5766 		return -EACCES;
5767 	}
5768 
5769 	if (reg->type & MEM_USER) {
5770 		verbose(env,
5771 			"%s is ptr_%s access user memory: off=%d\n",
5772 			reg_arg_name(env, argno), tname, off);
5773 		return -EACCES;
5774 	}
5775 
5776 	if (reg->type & MEM_PERCPU) {
5777 		verbose(env,
5778 			"%s is ptr_%s access percpu memory: off=%d\n",
5779 			reg_arg_name(env, argno), tname, off);
5780 		return -EACCES;
5781 	}
5782 
5783 	if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
5784 		if (!btf_is_kernel(reg->btf)) {
5785 			verifier_bug(env, "reg->btf must be kernel btf");
5786 			return -EFAULT;
5787 		}
5788 		ret = env->ops->btf_struct_access(&env->log, reg, off, size);
5789 	} else {
5790 		/* Writes are permitted with default btf_struct_access for
5791 		 * program allocated objects (which always have id > 0),
5792 		 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
5793 		 */
5794 		if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) {
5795 			verbose(env, "only read is supported\n");
5796 			return -EACCES;
5797 		}
5798 
5799 		if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
5800 		    !(reg->type & MEM_RCU) && !reg_is_referenced(env, reg)) {
5801 			verifier_bug(env, "allocated object must have a referenced id");
5802 			return -EFAULT;
5803 		}
5804 
5805 		ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name);
5806 	}
5807 
5808 	if (ret < 0)
5809 		return ret;
5810 
5811 	if (ret != PTR_TO_BTF_ID) {
5812 		/* just mark; */
5813 
5814 	} else if (type_flag(reg->type) & PTR_UNTRUSTED) {
5815 		/* If this is an untrusted pointer, all pointers formed by walking it
5816 		 * also inherit the untrusted flag.
5817 		 */
5818 		flag = PTR_UNTRUSTED;
5819 
5820 	} else if (is_trusted_reg(env, reg) || is_rcu_reg(reg)) {
5821 		/* By default any pointer obtained from walking a trusted pointer is no
5822 		 * longer trusted, unless the field being accessed has explicitly been
5823 		 * marked as inheriting its parent's state of trust (either full or RCU).
5824 		 * For example:
5825 		 * 'cgroups' pointer is untrusted if task->cgroups dereference
5826 		 * happened in a sleepable program outside of bpf_rcu_read_lock()
5827 		 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
5828 		 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
5829 		 *
5830 		 * A regular RCU-protected pointer with __rcu tag can also be deemed
5831 		 * trusted if we are in an RCU CS. Such pointer can be NULL.
5832 		 */
5833 		if (type_is_trusted(env, reg, field_name, btf_id)) {
5834 			flag |= PTR_TRUSTED;
5835 		} else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) {
5836 			flag |= PTR_TRUSTED | PTR_MAYBE_NULL;
5837 		} else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
5838 			if (type_is_rcu(env, reg, field_name, btf_id)) {
5839 				/* ignore __rcu tag and mark it MEM_RCU */
5840 				flag |= MEM_RCU;
5841 			} else if (flag & MEM_RCU ||
5842 				   type_is_rcu_or_null(env, reg, field_name, btf_id)) {
5843 				/* __rcu tagged pointers can be NULL */
5844 				flag |= MEM_RCU | PTR_MAYBE_NULL;
5845 
5846 				/* We always trust them */
5847 				if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
5848 				    flag & PTR_UNTRUSTED)
5849 					flag &= ~PTR_UNTRUSTED;
5850 			} else if (flag & (MEM_PERCPU | MEM_USER)) {
5851 				/* keep as-is */
5852 			} else {
5853 				/* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
5854 				clear_trusted_flags(&flag);
5855 			}
5856 		} else {
5857 			/*
5858 			 * If not in RCU CS or MEM_RCU pointer can be NULL then
5859 			 * aggressively mark as untrusted otherwise such
5860 			 * pointers will be plain PTR_TO_BTF_ID without flags
5861 			 * and will be allowed to be passed into helpers for
5862 			 * compat reasons.
5863 			 */
5864 			flag = PTR_UNTRUSTED;
5865 		}
5866 	} else {
5867 		/* Old compat. Deprecated */
5868 		clear_trusted_flags(&flag);
5869 	}
5870 
5871 	if (atype == BPF_READ && value_regno >= 0) {
5872 		ret = mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
5873 		if (ret < 0)
5874 			return ret;
5875 	}
5876 
5877 	return 0;
5878 }
5879 
5880 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
5881 				   struct bpf_reg_state *regs, struct bpf_reg_state *reg,
5882 				   argno_t argno, int off, int size,
5883 				   enum bpf_access_type atype,
5884 				   int value_regno)
5885 {
5886 	struct bpf_map *map = reg->map_ptr;
5887 	struct bpf_reg_state map_reg;
5888 	enum bpf_type_flag flag = 0;
5889 	const struct btf_type *t;
5890 	const char *tname;
5891 	u32 btf_id;
5892 	int ret;
5893 
5894 	if (!btf_vmlinux) {
5895 		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
5896 		return -ENOTSUPP;
5897 	}
5898 
5899 	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
5900 		verbose(env, "map_ptr access not supported for map type %d\n",
5901 			map->map_type);
5902 		return -ENOTSUPP;
5903 	}
5904 
5905 	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
5906 	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5907 
5908 	if (!env->allow_ptr_leaks) {
5909 		verbose(env,
5910 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5911 			tname);
5912 		return -EPERM;
5913 	}
5914 
5915 	if (off < 0) {
5916 		verbose(env, "%s is %s invalid negative access: off=%d\n",
5917 			reg_arg_name(env, argno), tname, off);
5918 		return -EACCES;
5919 	}
5920 
5921 	if (atype != BPF_READ) {
5922 		verbose(env, "only read from %s is supported\n", tname);
5923 		return -EACCES;
5924 	}
5925 
5926 	/* Simulate access to a PTR_TO_BTF_ID */
5927 	memset(&map_reg, 0, sizeof(map_reg));
5928 	ret = mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID,
5929 			      btf_vmlinux, *map->ops->map_btf_id, 0);
5930 	if (ret < 0)
5931 		return ret;
5932 	ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
5933 	if (ret < 0)
5934 		return ret;
5935 
5936 	if (value_regno >= 0) {
5937 		ret = mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
5938 		if (ret < 0)
5939 			return ret;
5940 	}
5941 
5942 	return 0;
5943 }
5944 
5945 /* Check that the stack access at the given offset is within bounds. The
5946  * maximum valid offset is -1.
5947  *
5948  * The minimum valid offset is -MAX_BPF_STACK for writes, and
5949  * -state->allocated_stack for reads.
5950  */
5951 static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
5952                                           s64 off,
5953                                           struct bpf_func_state *state,
5954                                           enum bpf_access_type t)
5955 {
5956 	int min_valid_off;
5957 
5958 	if (t == BPF_WRITE || env->allow_uninit_stack)
5959 		min_valid_off = -MAX_BPF_STACK;
5960 	else
5961 		min_valid_off = -state->allocated_stack;
5962 
5963 	if (off < min_valid_off || off > -1)
5964 		return -EACCES;
5965 	return 0;
5966 }
5967 
5968 /* Check that the stack access at 'regno + off' falls within the maximum stack
5969  * bounds.
5970  *
5971  * 'off' includes `regno->offset`, but not its dynamic part (if any).
5972  */
5973 static int check_stack_access_within_bounds(
5974 		struct bpf_verifier_env *env, struct bpf_reg_state *reg,
5975 		argno_t argno, int off, int access_size,
5976 		enum bpf_access_type type)
5977 {
5978 	struct bpf_func_state *state = bpf_func(env, reg);
5979 	s64 min_off, max_off;
5980 	int err;
5981 	char *err_extra;
5982 
5983 	if (type == BPF_READ)
5984 		err_extra = " read from";
5985 	else
5986 		err_extra = " write to";
5987 
5988 	if (tnum_is_const(reg->var_off)) {
5989 		min_off = (s64)reg->var_off.value + off;
5990 		max_off = min_off + access_size;
5991 	} else {
5992 		if (reg_smax(reg) >= BPF_MAX_VAR_OFF ||
5993 		    reg_smin(reg) <= -BPF_MAX_VAR_OFF) {
5994 			verbose(env, "invalid unbounded variable-offset%s stack %s\n",
5995 				err_extra, reg_arg_name(env, argno));
5996 			return -EACCES;
5997 		}
5998 		min_off = reg_smin(reg) + off;
5999 		max_off = reg_smax(reg) + off + access_size;
6000 	}
6001 
6002 	err = check_stack_slot_within_bounds(env, min_off, state, type);
6003 	if (!err && max_off > 0)
6004 		err = -EINVAL; /* out of stack access into non-negative offsets */
6005 	if (!err && access_size < 0)
6006 		/* access_size should not be negative (or overflow an int); others checks
6007 		 * along the way should have prevented such an access.
6008 		 */
6009 		err = -EFAULT; /* invalid negative access size; integer overflow? */
6010 
6011 	if (err) {
6012 		if (tnum_is_const(reg->var_off)) {
6013 			verbose(env, "invalid%s stack %s off=%lld size=%d\n",
6014 				err_extra, reg_arg_name(env, argno), min_off, access_size);
6015 		} else {
6016 			char tn_buf[48];
6017 
6018 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6019 			verbose(env, "invalid variable-offset%s stack %s var_off=%s off=%d size=%d\n",
6020 				err_extra, reg_arg_name(env, argno), tn_buf, off, access_size);
6021 		}
6022 		return err;
6023 	}
6024 
6025 	/* Note that there is no stack access with offset zero, so the needed stack
6026 	 * size is -min_off, not -min_off+1.
6027 	 */
6028 	return grow_stack_state(env, state, -min_off /* size */);
6029 }
6030 
6031 static bool get_func_retval_range(struct bpf_prog *prog,
6032 				  struct bpf_retval_range *range)
6033 {
6034 	if (prog->type == BPF_PROG_TYPE_LSM &&
6035 		prog->expected_attach_type == BPF_LSM_MAC &&
6036 		!bpf_lsm_get_retval_range(prog, range)) {
6037 		return true;
6038 	}
6039 	return false;
6040 }
6041 
6042 static void add_scalar_to_reg(struct bpf_reg_state *dst_reg, s64 val)
6043 {
6044 	struct bpf_reg_state fake_reg;
6045 
6046 	if (!val)
6047 		return;
6048 
6049 	fake_reg.type = SCALAR_VALUE;
6050 	__mark_reg_known(&fake_reg, val);
6051 
6052 	scalar32_min_max_add(dst_reg, &fake_reg);
6053 	scalar_min_max_add(dst_reg, &fake_reg);
6054 	dst_reg->var_off = tnum_add(dst_reg->var_off, fake_reg.var_off);
6055 
6056 	reg_bounds_sync(dst_reg);
6057 }
6058 
6059 /* check whether memory at (regno + off) is accessible for t = (read | write)
6060  * if t==write, value_regno is a register which value is stored into memory
6061  * if t==read, value_regno is a register which will receive the value from memory
6062  * if t==write && value_regno==-1, some unknown value is stored into memory
6063  * if t==read && value_regno==-1, don't care what we read from memory
6064  */
6065 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *reg, argno_t argno,
6066 			    int off, int bpf_size, enum bpf_access_type t,
6067 			    int value_regno, bool strict_alignment_once, bool is_ldsx)
6068 {
6069 	struct bpf_reg_state *regs = cur_regs(env);
6070 	int size, err = 0;
6071 
6072 	size = bpf_size_to_bytes(bpf_size);
6073 	if (size < 0)
6074 		return size;
6075 
6076 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
6077 	if (err)
6078 		return err;
6079 
6080 	if (reg->type == PTR_TO_MAP_KEY) {
6081 		if (t == BPF_WRITE) {
6082 			verbose(env, "write to change key %s not allowed\n",
6083 				reg_arg_name(env, argno));
6084 			return -EACCES;
6085 		}
6086 
6087 		err = check_mem_region_access(env, reg, argno, off, size,
6088 					      reg->map_ptr->key_size, false);
6089 		if (err)
6090 			return err;
6091 		if (value_regno >= 0)
6092 			mark_reg_unknown(env, regs, value_regno);
6093 	} else if (reg->type == PTR_TO_MAP_VALUE) {
6094 		struct btf_field *kptr_field = NULL;
6095 
6096 		if (t == BPF_WRITE && value_regno >= 0 &&
6097 		    is_pointer_value(env, value_regno)) {
6098 			verbose(env, "R%d leaks addr into map\n", value_regno);
6099 			return -EACCES;
6100 		}
6101 		err = check_map_access_type(env, reg, off, size, t);
6102 		if (err)
6103 			return err;
6104 		err = check_map_access(env, reg, argno, off, size, false, ACCESS_DIRECT);
6105 		if (err)
6106 			return err;
6107 		if (tnum_is_const(reg->var_off))
6108 			kptr_field = btf_record_find(reg->map_ptr->record,
6109 						     off + reg->var_off.value, BPF_KPTR | BPF_UPTR);
6110 		if (kptr_field) {
6111 			err = check_map_kptr_access(env, value_regno, insn_idx, kptr_field);
6112 		} else if (t == BPF_READ && value_regno >= 0) {
6113 			struct bpf_map *map = reg->map_ptr;
6114 
6115 			/*
6116 			 * If map is read-only, track its contents as scalars,
6117 			 * unless it is an insn array (see the special case below)
6118 			 */
6119 			if (tnum_is_const(reg->var_off) &&
6120 			    bpf_map_is_rdonly(map) &&
6121 			    map->ops->map_direct_value_addr &&
6122 			    map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
6123 				int map_off = off + reg->var_off.value;
6124 				u64 val = 0;
6125 
6126 				err = bpf_map_direct_read(map, map_off, size,
6127 							  &val, is_ldsx);
6128 				if (err)
6129 					return err;
6130 
6131 				regs[value_regno].type = SCALAR_VALUE;
6132 				__mark_reg_known(&regs[value_regno], val);
6133 			} else if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
6134 				if (bpf_size != BPF_DW) {
6135 					verbose(env, "Invalid read of %d bytes from insn_array\n",
6136 						     size);
6137 					return -EACCES;
6138 				}
6139 				regs[value_regno] = *reg;
6140 				add_scalar_to_reg(&regs[value_regno], off);
6141 				regs[value_regno].type = PTR_TO_INSN;
6142 			} else {
6143 				mark_reg_unknown(env, regs, value_regno);
6144 			}
6145 		}
6146 	} else if (base_type(reg->type) == PTR_TO_MEM) {
6147 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
6148 		bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED);
6149 
6150 		if (type_may_be_null(reg->type)) {
6151 			verbose(env, "%s invalid mem access '%s'\n", reg_arg_name(env, argno),
6152 				reg_type_str(env, reg->type));
6153 			return -EACCES;
6154 		}
6155 
6156 		if (t == BPF_WRITE && rdonly_mem) {
6157 			verbose(env, "%s cannot write into %s\n",
6158 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
6159 			return -EACCES;
6160 		}
6161 
6162 		if (t == BPF_WRITE && value_regno >= 0 &&
6163 		    is_pointer_value(env, value_regno)) {
6164 			verbose(env, "R%d leaks addr into mem\n", value_regno);
6165 			return -EACCES;
6166 		}
6167 
6168 		/*
6169 		 * Accesses to untrusted PTR_TO_MEM are done through probe
6170 		 * instructions, hence no need to check bounds in that case.
6171 		 */
6172 		if (!rdonly_untrusted)
6173 			err = check_mem_region_access(env, reg, argno, off, size,
6174 						      reg->mem_size, false);
6175 		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
6176 			mark_reg_unknown(env, regs, value_regno);
6177 	} else if (reg->type == PTR_TO_CTX) {
6178 		struct bpf_insn_access_aux info = {
6179 			.reg_type = SCALAR_VALUE,
6180 			.is_ldsx = is_ldsx,
6181 			.log = &env->log,
6182 		};
6183 		struct bpf_retval_range range;
6184 
6185 		if (t == BPF_WRITE && value_regno >= 0 &&
6186 		    is_pointer_value(env, value_regno)) {
6187 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
6188 			return -EACCES;
6189 		}
6190 
6191 		err = check_ctx_access(env, insn_idx, reg, argno, off, size, t, &info);
6192 		if (!err && t == BPF_READ && value_regno >= 0) {
6193 			/* ctx access returns either a scalar, or a
6194 			 * PTR_TO_PACKET[_META,_END]. In the latter
6195 			 * case, we know the offset is zero.
6196 			 */
6197 			if (info.reg_type == SCALAR_VALUE) {
6198 				if (info.is_retval && get_func_retval_range(env->prog, &range)) {
6199 					err = __mark_reg_s32_range(env, regs, value_regno,
6200 								   range.minval, range.maxval);
6201 					if (err)
6202 						return err;
6203 				} else {
6204 					mark_reg_unknown(env, regs, value_regno);
6205 				}
6206 			} else {
6207 				mark_reg_known_zero(env, regs,
6208 						    value_regno);
6209 				/* A load of ctx field could have different
6210 				 * actual load size with the one encoded in the
6211 				 * insn. When the dst is PTR, it is for sure not
6212 				 * a sub-register.
6213 				 */
6214 				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
6215 				if (base_type(info.reg_type) == PTR_TO_BTF_ID) {
6216 					regs[value_regno].btf = info.btf;
6217 					regs[value_regno].btf_id = info.btf_id;
6218 					regs[value_regno].id = info.ref_id;
6219 				}
6220 				if (type_may_be_null(info.reg_type) && !regs[value_regno].id)
6221 					regs[value_regno].id = ++env->id_gen;
6222 			}
6223 			regs[value_regno].type = info.reg_type;
6224 		}
6225 
6226 	} else if (reg->type == PTR_TO_STACK) {
6227 		/* Basic bounds checks. */
6228 		err = check_stack_access_within_bounds(env, reg, argno, off, size, t);
6229 		if (err)
6230 			return err;
6231 
6232 		if (t == BPF_READ)
6233 			err = check_stack_read(env, reg, argno, off, size,
6234 					       value_regno);
6235 		else
6236 			err = check_stack_write(env, reg, off, size,
6237 						value_regno, insn_idx);
6238 	} else if (reg_is_pkt_pointer(reg)) {
6239 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
6240 			verbose(env, "cannot write into packet\n");
6241 			return -EACCES;
6242 		}
6243 		if (t == BPF_WRITE && value_regno >= 0 &&
6244 		    is_pointer_value(env, value_regno)) {
6245 			verbose(env, "R%d leaks addr into packet\n",
6246 				value_regno);
6247 			return -EACCES;
6248 		}
6249 		err = check_packet_access(env, reg, argno, off, size, false);
6250 		if (!err && t == BPF_READ && value_regno >= 0)
6251 			mark_reg_unknown(env, regs, value_regno);
6252 	} else if (reg->type == PTR_TO_FLOW_KEYS) {
6253 		if (t == BPF_WRITE && value_regno >= 0 &&
6254 		    is_pointer_value(env, value_regno)) {
6255 			verbose(env, "R%d leaks addr into flow keys\n",
6256 				value_regno);
6257 			return -EACCES;
6258 		}
6259 
6260 		err = check_flow_keys_access(env, reg, argno, off, size);
6261 		if (!err && t == BPF_READ && value_regno >= 0)
6262 			mark_reg_unknown(env, regs, value_regno);
6263 	} else if (type_is_sk_pointer(reg->type)) {
6264 		if (t == BPF_WRITE) {
6265 			verbose(env, "%s cannot write into %s\n",
6266 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
6267 			return -EACCES;
6268 		}
6269 		err = check_sock_access(env, insn_idx, reg, argno, off, size, t);
6270 		if (!err && value_regno >= 0)
6271 			mark_reg_unknown(env, regs, value_regno);
6272 	} else if (reg->type == PTR_TO_TP_BUFFER) {
6273 		err = check_tp_buffer_access(env, reg, argno, off, size);
6274 		if (!err && t == BPF_READ && value_regno >= 0)
6275 			mark_reg_unknown(env, regs, value_regno);
6276 	} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
6277 		   !type_may_be_null(reg->type)) {
6278 		err = check_ptr_to_btf_access(env, regs, reg, argno, off, size, t,
6279 					      value_regno);
6280 	} else if (reg->type == CONST_PTR_TO_MAP) {
6281 		err = check_ptr_to_map_access(env, regs, reg, argno, off, size, t,
6282 					      value_regno);
6283 	} else if (base_type(reg->type) == PTR_TO_BUF &&
6284 		   !type_may_be_null(reg->type)) {
6285 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
6286 		u32 *max_access;
6287 
6288 		if (rdonly_mem) {
6289 			if (t == BPF_WRITE) {
6290 				verbose(env, "%s cannot write into %s\n",
6291 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
6292 				return -EACCES;
6293 			}
6294 			max_access = &env->prog->aux->max_rdonly_access;
6295 		} else {
6296 			max_access = &env->prog->aux->max_rdwr_access;
6297 		}
6298 
6299 		err = check_buffer_access(env, reg, argno, off, size, false,
6300 					  max_access);
6301 
6302 		if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
6303 			mark_reg_unknown(env, regs, value_regno);
6304 	} else if (reg->type == PTR_TO_ARENA) {
6305 		if (t == BPF_READ && value_regno >= 0)
6306 			mark_reg_unknown(env, regs, value_regno);
6307 	} else {
6308 		verbose(env, "%s invalid mem access '%s'\n", reg_arg_name(env, argno),
6309 			reg_type_str(env, reg->type));
6310 		return -EACCES;
6311 	}
6312 
6313 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
6314 	    regs[value_regno].type == SCALAR_VALUE) {
6315 		if (!is_ldsx)
6316 			/* b/h/w load zero-extends, mark upper bits as known 0 */
6317 			coerce_reg_to_size(&regs[value_regno], size);
6318 		else
6319 			coerce_reg_to_size_sx(&regs[value_regno], size);
6320 	}
6321 	return err;
6322 }
6323 
6324 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
6325 			     bool allow_trust_mismatch);
6326 
6327 static int check_load_mem(struct bpf_verifier_env *env, struct bpf_insn *insn,
6328 			  bool strict_alignment_once, bool is_ldsx,
6329 			  bool allow_trust_mismatch, const char *ctx)
6330 {
6331 	struct bpf_verifier_state *vstate = env->cur_state;
6332 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
6333 	struct bpf_reg_state *regs = cur_regs(env);
6334 	enum bpf_reg_type src_reg_type;
6335 	int err;
6336 
6337 	/* Handle stack arg read */
6338 	if (is_stack_arg_ldx(insn)) {
6339 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
6340 		if (err)
6341 			return err;
6342 		return check_stack_arg_read(env, state, insn->off, insn->dst_reg);
6343 	}
6344 
6345 	/* check src operand */
6346 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6347 	if (err)
6348 		return err;
6349 
6350 	/* check dst operand */
6351 	err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
6352 	if (err)
6353 		return err;
6354 
6355 	src_reg_type = regs[insn->src_reg].type;
6356 
6357 	/* Check if (src_reg + off) is readable. The state of dst_reg will be
6358 	 * updated by this call.
6359 	 */
6360 	err = check_mem_access(env, env->insn_idx, regs + insn->src_reg, argno_from_reg(insn->src_reg), insn->off,
6361 			       BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
6362 			       strict_alignment_once, is_ldsx);
6363 	err = err ?: save_aux_ptr_type(env, src_reg_type,
6364 				       allow_trust_mismatch);
6365 	err = err ?: reg_bounds_sanity_check(env, &regs[insn->dst_reg], ctx);
6366 
6367 	return err;
6368 }
6369 
6370 static int check_store_reg(struct bpf_verifier_env *env, struct bpf_insn *insn,
6371 			   bool strict_alignment_once)
6372 {
6373 	struct bpf_verifier_state *vstate = env->cur_state;
6374 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
6375 	struct bpf_reg_state *regs = cur_regs(env);
6376 	enum bpf_reg_type dst_reg_type;
6377 	int err;
6378 
6379 	/* Handle stack arg write */
6380 	if (is_stack_arg_stx(insn)) {
6381 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
6382 		if (err)
6383 			return err;
6384 		return check_stack_arg_write(env, state, insn->off, regs + insn->src_reg);
6385 	}
6386 
6387 	/* check src1 operand */
6388 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6389 	if (err)
6390 		return err;
6391 
6392 	/* check src2 operand */
6393 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6394 	if (err)
6395 		return err;
6396 
6397 	dst_reg_type = regs[insn->dst_reg].type;
6398 
6399 	/* Check if (dst_reg + off) is writeable. */
6400 	err = check_mem_access(env, env->insn_idx, regs + insn->dst_reg, argno_from_reg(insn->dst_reg), insn->off,
6401 			       BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
6402 			       strict_alignment_once, false);
6403 	err = err ?: save_aux_ptr_type(env, dst_reg_type, false);
6404 
6405 	return err;
6406 }
6407 
6408 static int check_atomic_rmw(struct bpf_verifier_env *env,
6409 			    struct bpf_insn *insn)
6410 {
6411 	struct bpf_reg_state *dst_reg;
6412 	int load_reg;
6413 	int err;
6414 
6415 	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
6416 		verbose(env, "invalid atomic operand size\n");
6417 		return -EINVAL;
6418 	}
6419 
6420 	/* check src1 operand */
6421 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6422 	if (err)
6423 		return err;
6424 
6425 	/* check src2 operand */
6426 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6427 	if (err)
6428 		return err;
6429 
6430 	if (insn->imm == BPF_CMPXCHG) {
6431 		/* Check comparison of R0 with memory location */
6432 		const u32 aux_reg = BPF_REG_0;
6433 
6434 		err = check_reg_arg(env, aux_reg, SRC_OP);
6435 		if (err)
6436 			return err;
6437 
6438 		if (is_pointer_value(env, aux_reg)) {
6439 			verbose(env, "R%d leaks addr into mem\n", aux_reg);
6440 			return -EACCES;
6441 		}
6442 	}
6443 
6444 	if (is_pointer_value(env, insn->src_reg)) {
6445 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6446 		return -EACCES;
6447 	}
6448 
6449 	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
6450 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6451 			insn->dst_reg,
6452 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6453 		return -EACCES;
6454 	}
6455 
6456 	if (insn->imm & BPF_FETCH) {
6457 		if (insn->imm == BPF_CMPXCHG)
6458 			load_reg = BPF_REG_0;
6459 		else
6460 			load_reg = insn->src_reg;
6461 
6462 		/* check and record load of old value */
6463 		err = check_reg_arg(env, load_reg, DST_OP);
6464 		if (err)
6465 			return err;
6466 	} else {
6467 		/* This instruction accesses a memory location but doesn't
6468 		 * actually load it into a register.
6469 		 */
6470 		load_reg = -1;
6471 	}
6472 
6473 	dst_reg = cur_regs(env) + insn->dst_reg;
6474 
6475 	/* Check whether we can read the memory, with second call for fetch
6476 	 * case to simulate the register fill.
6477 	 */
6478 	err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg), insn->off,
6479 			       BPF_SIZE(insn->code), BPF_READ, -1, true, false);
6480 	if (!err && load_reg >= 0)
6481 		err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg),
6482 				       insn->off, BPF_SIZE(insn->code),
6483 				       BPF_READ, load_reg, true, false);
6484 	if (err)
6485 		return err;
6486 
6487 	if (is_arena_reg(env, insn->dst_reg)) {
6488 		err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
6489 		if (err)
6490 			return err;
6491 	}
6492 	/* Check whether we can write into the same memory. */
6493 	err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg), insn->off,
6494 			       BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
6495 	if (err)
6496 		return err;
6497 	return 0;
6498 }
6499 
6500 static int check_atomic_load(struct bpf_verifier_env *env,
6501 			     struct bpf_insn *insn)
6502 {
6503 	int err;
6504 
6505 	err = check_load_mem(env, insn, true, false, false, "atomic_load");
6506 	if (err)
6507 		return err;
6508 
6509 	if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
6510 		verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
6511 			insn->src_reg,
6512 			reg_type_str(env, reg_state(env, insn->src_reg)->type));
6513 		return -EACCES;
6514 	}
6515 
6516 	return 0;
6517 }
6518 
6519 static int check_atomic_store(struct bpf_verifier_env *env,
6520 			      struct bpf_insn *insn)
6521 {
6522 	int err;
6523 
6524 	err = check_store_reg(env, insn, true);
6525 	if (err)
6526 		return err;
6527 
6528 	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
6529 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6530 			insn->dst_reg,
6531 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6532 		return -EACCES;
6533 	}
6534 
6535 	return 0;
6536 }
6537 
6538 static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn)
6539 {
6540 	switch (insn->imm) {
6541 	case BPF_ADD:
6542 	case BPF_ADD | BPF_FETCH:
6543 	case BPF_AND:
6544 	case BPF_AND | BPF_FETCH:
6545 	case BPF_OR:
6546 	case BPF_OR | BPF_FETCH:
6547 	case BPF_XOR:
6548 	case BPF_XOR | BPF_FETCH:
6549 	case BPF_XCHG:
6550 	case BPF_CMPXCHG:
6551 		return check_atomic_rmw(env, insn);
6552 	case BPF_LOAD_ACQ:
6553 		if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
6554 			verbose(env,
6555 				"64-bit load-acquires are only supported on 64-bit arches\n");
6556 			return -EOPNOTSUPP;
6557 		}
6558 		return check_atomic_load(env, insn);
6559 	case BPF_STORE_REL:
6560 		if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
6561 			verbose(env,
6562 				"64-bit store-releases are only supported on 64-bit arches\n");
6563 			return -EOPNOTSUPP;
6564 		}
6565 		return check_atomic_store(env, insn);
6566 	default:
6567 		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n",
6568 			insn->imm);
6569 		return -EINVAL;
6570 	}
6571 }
6572 
6573 /* When register 'regno' is used to read the stack (either directly or through
6574  * a helper function) make sure that it's within stack boundary and, depending
6575  * on the access type and privileges, that all elements of the stack are
6576  * initialized.
6577  *
6578  * All registers that have been spilled on the stack in the slots within the
6579  * read offsets are marked as read.
6580  */
6581 static int check_stack_range_initialized(
6582 		struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off,
6583 		int access_size, bool zero_size_allowed,
6584 		enum bpf_access_type type, struct bpf_call_arg_meta *meta)
6585 {
6586 	struct bpf_func_state *state = bpf_func(env, reg);
6587 	int err, min_off, max_off, i, j, slot, spi;
6588 	/* Some accesses can write anything into the stack, others are
6589 	 * read-only.
6590 	 */
6591 	bool clobber = type == BPF_WRITE;
6592 	/*
6593 	 * Negative access_size signals global subprog/kfunc arg check where
6594 	 * STACK_POISON slots are acceptable. static stack liveness
6595 	 * might have determined that subprog doesn't read them,
6596 	 * but BTF based global subprog validation isn't accurate enough.
6597 	 */
6598 	bool allow_poison = access_size < 0 || clobber;
6599 
6600 	access_size = abs(access_size);
6601 
6602 	if (access_size == 0 && !zero_size_allowed) {
6603 		verbose(env, "invalid zero-sized read\n");
6604 		return -EACCES;
6605 	}
6606 
6607 	err = check_stack_access_within_bounds(env, reg, argno, off, access_size, type);
6608 	if (err)
6609 		return err;
6610 
6611 
6612 	if (tnum_is_const(reg->var_off)) {
6613 		min_off = max_off = reg->var_off.value + off;
6614 	} else {
6615 		/* Variable offset is prohibited for unprivileged mode for
6616 		 * simplicity since it requires corresponding support in
6617 		 * Spectre masking for stack ALU.
6618 		 * See also retrieve_ptr_limit().
6619 		 */
6620 		if (!env->bypass_spec_v1) {
6621 			char tn_buf[48];
6622 
6623 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6624 			verbose(env, "%s variable offset stack access prohibited for !root, var_off=%s\n",
6625 				reg_arg_name(env, argno), tn_buf);
6626 			return -EACCES;
6627 		}
6628 		/* Only initialized buffer on stack is allowed to be accessed
6629 		 * with variable offset. With uninitialized buffer it's hard to
6630 		 * guarantee that whole memory is marked as initialized on
6631 		 * helper return since specific bounds are unknown what may
6632 		 * cause uninitialized stack leaking.
6633 		 */
6634 		if (meta && meta->raw_mode)
6635 			meta = NULL;
6636 
6637 		min_off = reg_smin(reg) + off;
6638 		max_off = reg_smax(reg) + off;
6639 	}
6640 
6641 	if (meta && meta->raw_mode) {
6642 		/* Ensure we won't be overwriting dynptrs when simulating byte
6643 		 * by byte access in check_helper_call using meta.access_size.
6644 		 * This would be a problem if we have a helper in the future
6645 		 * which takes:
6646 		 *
6647 		 *	helper(uninit_mem, len, dynptr)
6648 		 *
6649 		 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
6650 		 * may end up writing to dynptr itself when touching memory from
6651 		 * arg 1. This can be relaxed on a case by case basis for known
6652 		 * safe cases, but reject due to the possibilitiy of aliasing by
6653 		 * default.
6654 		 */
6655 		for (i = min_off; i < max_off + access_size; i++) {
6656 			int stack_off = -i - 1;
6657 
6658 			spi = bpf_get_spi(i);
6659 			/* raw_mode may write past allocated_stack */
6660 			if (state->allocated_stack <= stack_off)
6661 				continue;
6662 			if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
6663 				verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
6664 				return -EACCES;
6665 			}
6666 		}
6667 		meta->access_size = access_size;
6668 		meta->regno = reg_from_argno(argno);
6669 		return 0;
6670 	}
6671 
6672 	for (i = min_off; i < max_off + access_size; i++) {
6673 		u8 *stype;
6674 
6675 		slot = -i - 1;
6676 		spi = slot / BPF_REG_SIZE;
6677 		if (state->allocated_stack <= slot) {
6678 			verbose(env, "allocated_stack too small\n");
6679 			return -EFAULT;
6680 		}
6681 
6682 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
6683 		if (*stype == STACK_MISC)
6684 			goto mark;
6685 		if ((*stype == STACK_ZERO) ||
6686 		    (*stype == STACK_INVALID && env->allow_uninit_stack)) {
6687 			if (clobber) {
6688 				/* helper can write anything into the stack */
6689 				*stype = STACK_MISC;
6690 			}
6691 			goto mark;
6692 		}
6693 
6694 		if (bpf_is_spilled_reg(&state->stack[spi]) &&
6695 		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
6696 		     env->allow_ptr_leaks)) {
6697 			if (clobber) {
6698 				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
6699 				for (j = 0; j < BPF_REG_SIZE; j++)
6700 					scrub_spilled_slot(&state->stack[spi].slot_type[j]);
6701 			}
6702 			goto mark;
6703 		}
6704 
6705 		if (*stype == STACK_POISON) {
6706 			if (allow_poison)
6707 				goto mark;
6708 			verbose(env, "reading from stack %s off %d+%d size %d, slot poisoned by dead code elimination\n",
6709 				reg_arg_name(env, argno), min_off, i - min_off, access_size);
6710 		} else if (tnum_is_const(reg->var_off)) {
6711 			verbose(env, "invalid read from stack %s off %d+%d size %d\n",
6712 				reg_arg_name(env, argno), min_off, i - min_off, access_size);
6713 		} else {
6714 			char tn_buf[48];
6715 
6716 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6717 			verbose(env, "invalid read from stack %s var_off %s+%d size %d\n",
6718 				reg_arg_name(env, argno), tn_buf, i - min_off, access_size);
6719 		}
6720 		return -EACCES;
6721 mark:
6722 		;
6723 	}
6724 	return 0;
6725 }
6726 
6727 static int check_helper_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
6728 				   int access_size, enum bpf_access_type access_type,
6729 				   bool zero_size_allowed,
6730 				   struct bpf_call_arg_meta *meta)
6731 {
6732 	struct bpf_reg_state *regs = cur_regs(env);
6733 	u32 *max_access;
6734 
6735 	switch (base_type(reg->type)) {
6736 	case PTR_TO_PACKET:
6737 	case PTR_TO_PACKET_META:
6738 		return check_packet_access(env, reg, argno, 0, access_size,
6739 					   zero_size_allowed);
6740 	case PTR_TO_MAP_KEY:
6741 		if (access_type == BPF_WRITE) {
6742 			verbose(env, "%s cannot write into %s\n",
6743 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
6744 			return -EACCES;
6745 		}
6746 		return check_mem_region_access(env, reg, argno, 0, access_size,
6747 					       reg->map_ptr->key_size, false);
6748 	case PTR_TO_MAP_VALUE:
6749 		if (check_map_access_type(env, reg, 0, access_size, access_type))
6750 			return -EACCES;
6751 		return check_map_access(env, reg, argno, 0, access_size,
6752 					zero_size_allowed, ACCESS_HELPER);
6753 	case PTR_TO_MEM:
6754 		if (type_is_rdonly_mem(reg->type)) {
6755 			if (access_type == BPF_WRITE) {
6756 				verbose(env, "%s cannot write into %s\n",
6757 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
6758 				return -EACCES;
6759 			}
6760 		}
6761 		return check_mem_region_access(env, reg, argno, 0,
6762 					       access_size, reg->mem_size,
6763 					       zero_size_allowed);
6764 	case PTR_TO_BUF:
6765 		if (type_is_rdonly_mem(reg->type)) {
6766 			if (access_type == BPF_WRITE) {
6767 				verbose(env, "%s cannot write into %s\n",
6768 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
6769 				return -EACCES;
6770 			}
6771 
6772 			max_access = &env->prog->aux->max_rdonly_access;
6773 		} else {
6774 			max_access = &env->prog->aux->max_rdwr_access;
6775 		}
6776 		return check_buffer_access(env, reg, argno, 0,
6777 					   access_size, zero_size_allowed,
6778 					   max_access);
6779 	case PTR_TO_STACK:
6780 		return check_stack_range_initialized(
6781 				env, reg,
6782 				argno, 0, access_size,
6783 				zero_size_allowed, access_type, meta);
6784 	case PTR_TO_BTF_ID:
6785 		return check_ptr_to_btf_access(env, regs, reg, argno, 0,
6786 					       access_size, access_type, -1);
6787 	case PTR_TO_CTX:
6788 		/* Only permit reading or writing syscall context using helper calls. */
6789 		if (is_var_ctx_off_allowed(env->prog)) {
6790 			int err = check_mem_region_access(env, reg, argno, 0, access_size, U16_MAX,
6791 							  zero_size_allowed);
6792 			if (err)
6793 				return err;
6794 			if (env->prog->aux->max_ctx_offset < reg_umax(reg) + access_size)
6795 				env->prog->aux->max_ctx_offset = reg_umax(reg) + access_size;
6796 			return 0;
6797 		}
6798 		fallthrough;
6799 	default: /* scalar_value or invalid ptr */
6800 		/* Allow zero-byte read from NULL, regardless of pointer type */
6801 		if (zero_size_allowed && access_size == 0 &&
6802 		    bpf_register_is_null(reg))
6803 			return 0;
6804 
6805 		verbose(env, "%s type=%s ", reg_arg_name(env, argno),
6806 			reg_type_str(env, reg->type));
6807 		verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
6808 		return -EACCES;
6809 	}
6810 }
6811 
6812 /* verify arguments to helpers or kfuncs consisting of a pointer and an access
6813  * size.
6814  *
6815  * @mem_reg contains the pointer, @size_reg contains the access size.
6816  */
6817 static int check_mem_size_reg(struct bpf_verifier_env *env,
6818 			      struct bpf_reg_state *mem_reg,
6819 			      struct bpf_reg_state *size_reg, argno_t mem_argno,
6820 			      argno_t size_argno, enum bpf_access_type access_type,
6821 			      bool zero_size_allowed,
6822 			      struct bpf_call_arg_meta *meta)
6823 {
6824 	int err;
6825 
6826 	/* This is used to refine r0 return value bounds for helpers
6827 	 * that enforce this value as an upper bound on return values.
6828 	 * See do_refine_retval_range() for helpers that can refine
6829 	 * the return value. C type of helper is u32 so we pull register
6830 	 * bound from umax_value however, if negative verifier errors
6831 	 * out. Only upper bounds can be learned because retval is an
6832 	 * int type and negative retvals are allowed.
6833 	 */
6834 	meta->msize_max_value = reg_umax(size_reg);
6835 
6836 	/* The register is SCALAR_VALUE; the access check happens using
6837 	 * its boundaries. For unprivileged variable accesses, disable
6838 	 * raw mode so that the program is required to initialize all
6839 	 * the memory that the helper could just partially fill up.
6840 	 */
6841 	if (!tnum_is_const(size_reg->var_off))
6842 		meta = NULL;
6843 
6844 	if (reg_smin(size_reg) < 0) {
6845 		verbose(env, "%s min value is negative, either use unsigned or 'var &= const'\n",
6846 			reg_arg_name(env, size_argno));
6847 		return -EACCES;
6848 	}
6849 
6850 	if (reg_umin(size_reg) == 0 && !zero_size_allowed) {
6851 		verbose(env, "%s invalid zero-sized read: u64=[%lld,%lld]\n",
6852 			reg_arg_name(env, size_argno), reg_umin(size_reg), reg_umax(size_reg));
6853 		return -EACCES;
6854 	}
6855 
6856 	if (reg_umax(size_reg) >= BPF_MAX_VAR_SIZ) {
6857 		verbose(env, "%s unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
6858 			reg_arg_name(env, size_argno));
6859 		return -EACCES;
6860 	}
6861 	err = check_helper_mem_access(env, mem_reg, mem_argno, reg_umax(size_reg),
6862 				      access_type, zero_size_allowed, meta);
6863 	if (!err) {
6864 		int regno = reg_from_argno(size_argno);
6865 
6866 		if (regno >= 0)
6867 			err = mark_chain_precision(env, regno);
6868 		else
6869 			err = mark_stack_arg_precision(env, arg_idx_from_argno(size_argno));
6870 	}
6871 	return err;
6872 }
6873 
6874 static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
6875 			 argno_t argno, u32 mem_size)
6876 {
6877 	bool may_be_null = type_may_be_null(reg->type);
6878 	struct bpf_reg_state saved_reg;
6879 	int err;
6880 
6881 	if (bpf_register_is_null(reg))
6882 		return 0;
6883 
6884 	if (mem_size > S32_MAX) {
6885 		verbose(env, "%s memory size %u is too large\n",
6886 			reg_arg_name(env, argno), mem_size);
6887 		return -EACCES;
6888 	}
6889 
6890 	/* Assuming that the register contains a value check if the memory
6891 	 * access is safe. Temporarily save and restore the register's state as
6892 	 * the conversion shouldn't be visible to a caller.
6893 	 */
6894 	if (may_be_null) {
6895 		saved_reg = *reg;
6896 		mark_ptr_not_null_reg(reg);
6897 	}
6898 
6899 	int size = base_type(reg->type) == PTR_TO_STACK ? -(int)mem_size : mem_size;
6900 
6901 	err = check_helper_mem_access(env, reg, argno, size, BPF_READ, true, NULL);
6902 	err = err ?: check_helper_mem_access(env, reg, argno, size, BPF_WRITE, true, NULL);
6903 
6904 	if (may_be_null)
6905 		*reg = saved_reg;
6906 
6907 	return err;
6908 }
6909 
6910 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *mem_reg,
6911 				    struct bpf_reg_state *size_reg, argno_t mem_argno, argno_t size_argno)
6912 {
6913 	bool may_be_null = type_may_be_null(mem_reg->type);
6914 	struct bpf_reg_state saved_reg;
6915 	struct bpf_call_arg_meta meta;
6916 	int err;
6917 
6918 	memset(&meta, 0, sizeof(meta));
6919 
6920 	if (may_be_null) {
6921 		saved_reg = *mem_reg;
6922 		mark_ptr_not_null_reg(mem_reg);
6923 	}
6924 
6925 	err = check_mem_size_reg(env, mem_reg, size_reg, mem_argno, size_argno, BPF_READ, true, &meta);
6926 	err = err ?: check_mem_size_reg(env, mem_reg, size_reg, mem_argno, size_argno, BPF_WRITE, true, &meta);
6927 
6928 	if (may_be_null)
6929 		*mem_reg = saved_reg;
6930 
6931 	return err;
6932 }
6933 
6934 enum {
6935 	PROCESS_SPIN_LOCK = (1 << 0),
6936 	PROCESS_RES_LOCK  = (1 << 1),
6937 	PROCESS_LOCK_IRQ  = (1 << 2),
6938 };
6939 
6940 /* Implementation details:
6941  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
6942  * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
6943  * Two bpf_map_lookups (even with the same key) will have different reg->id.
6944  * Two separate bpf_obj_new will also have different reg->id.
6945  * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
6946  * clears reg->id after value_or_null->value transition, since the verifier only
6947  * cares about the range of access to valid map value pointer and doesn't care
6948  * about actual address of the map element.
6949  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
6950  * reg->id > 0 after value_or_null->value transition. By doing so
6951  * two bpf_map_lookups will be considered two different pointers that
6952  * point to different bpf_spin_locks. Likewise for pointers to allocated objects
6953  * returned from bpf_obj_new.
6954  * The verifier allows taking only one bpf_spin_lock at a time to avoid
6955  * dead-locks.
6956  * Since only one bpf_spin_lock is allowed the checks are simpler than
6957  * reg_is_refcounted() logic. The verifier needs to remember only
6958  * one spin_lock instead of array of acquired_refs.
6959  * env->cur_state->active_locks remembers which map value element or allocated
6960  * object got locked and clears it after bpf_spin_unlock.
6961  */
6962 static int process_spin_lock(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int flags)
6963 {
6964 	bool is_lock = flags & PROCESS_SPIN_LOCK, is_res_lock = flags & PROCESS_RES_LOCK;
6965 	const char *lock_str = is_res_lock ? "bpf_res_spin" : "bpf_spin";
6966 	struct bpf_verifier_state *cur = env->cur_state;
6967 	bool is_const = tnum_is_const(reg->var_off);
6968 	bool is_irq = flags & PROCESS_LOCK_IRQ;
6969 	u64 val = reg->var_off.value;
6970 	struct bpf_map *map = NULL;
6971 	struct btf *btf = NULL;
6972 	struct btf_record *rec;
6973 	u32 spin_lock_off;
6974 	int err;
6975 
6976 	if (!is_const) {
6977 		verbose(env,
6978 			"%s doesn't have constant offset. %s_lock has to be at the constant offset\n",
6979 			reg_arg_name(env, argno), lock_str);
6980 		return -EINVAL;
6981 	}
6982 	if (reg->type == PTR_TO_MAP_VALUE) {
6983 		map = reg->map_ptr;
6984 		if (!map->btf) {
6985 			verbose(env,
6986 				"map '%s' has to have BTF in order to use %s_lock\n",
6987 				map->name, lock_str);
6988 			return -EINVAL;
6989 		}
6990 	} else {
6991 		btf = reg->btf;
6992 	}
6993 
6994 	rec = reg_btf_record(reg);
6995 	if (!btf_record_has_field(rec, is_res_lock ? BPF_RES_SPIN_LOCK : BPF_SPIN_LOCK)) {
6996 		verbose(env, "%s '%s' has no valid %s_lock\n", map ? "map" : "local",
6997 			map ? map->name : "kptr", lock_str);
6998 		return -EINVAL;
6999 	}
7000 	spin_lock_off = is_res_lock ? rec->res_spin_lock_off : rec->spin_lock_off;
7001 	if (spin_lock_off != val) {
7002 		verbose(env, "off %lld doesn't point to 'struct %s_lock' that is at %d\n",
7003 			val, lock_str, spin_lock_off);
7004 		return -EINVAL;
7005 	}
7006 	if (is_lock) {
7007 		void *ptr;
7008 		int type;
7009 
7010 		if (map)
7011 			ptr = map;
7012 		else
7013 			ptr = btf;
7014 
7015 		if (!is_res_lock && cur->active_locks) {
7016 			if (find_lock_state(env->cur_state, REF_TYPE_LOCK, 0, NULL)) {
7017 				verbose(env,
7018 					"Locking two bpf_spin_locks are not allowed\n");
7019 				return -EINVAL;
7020 			}
7021 		} else if (is_res_lock && cur->active_locks) {
7022 			if (find_lock_state(env->cur_state, REF_TYPE_RES_LOCK | REF_TYPE_RES_LOCK_IRQ, reg->id, ptr)) {
7023 				verbose(env, "Acquiring the same lock again, AA deadlock detected\n");
7024 				return -EINVAL;
7025 			}
7026 		}
7027 
7028 		if (is_res_lock && is_irq)
7029 			type = REF_TYPE_RES_LOCK_IRQ;
7030 		else if (is_res_lock)
7031 			type = REF_TYPE_RES_LOCK;
7032 		else
7033 			type = REF_TYPE_LOCK;
7034 		err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr);
7035 		if (err < 0) {
7036 			verbose(env, "Failed to acquire lock state\n");
7037 			return err;
7038 		}
7039 	} else {
7040 		void *ptr;
7041 		int type;
7042 
7043 		if (map)
7044 			ptr = map;
7045 		else
7046 			ptr = btf;
7047 
7048 		if (!cur->active_locks) {
7049 			verbose(env, "%s_unlock without taking a lock\n", lock_str);
7050 			return -EINVAL;
7051 		}
7052 
7053 		if (is_res_lock && is_irq)
7054 			type = REF_TYPE_RES_LOCK_IRQ;
7055 		else if (is_res_lock)
7056 			type = REF_TYPE_RES_LOCK;
7057 		else
7058 			type = REF_TYPE_LOCK;
7059 		if (!find_lock_state(cur, type, reg->id, ptr)) {
7060 			verbose(env, "%s_unlock of different lock\n", lock_str);
7061 			return -EINVAL;
7062 		}
7063 		if (reg->id != cur->active_lock_id || ptr != cur->active_lock_ptr) {
7064 			verbose(env, "%s_unlock cannot be out of order\n", lock_str);
7065 			return -EINVAL;
7066 		}
7067 		if (release_lock_state(cur, type, reg->id, ptr)) {
7068 			verbose(env, "%s_unlock of different lock\n", lock_str);
7069 			return -EINVAL;
7070 		}
7071 
7072 		invalidate_non_owning_refs(env);
7073 	}
7074 	return 0;
7075 }
7076 
7077 /* Check if @regno is a pointer to a specific field in a map value */
7078 static int check_map_field_pointer(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7079 				   enum btf_field_type field_type,
7080 				   struct bpf_map_desc *map_desc)
7081 {
7082 	bool is_const = tnum_is_const(reg->var_off);
7083 	struct bpf_map *map = reg->map_ptr;
7084 	u64 val = reg->var_off.value;
7085 	const char *struct_name = btf_field_type_name(field_type);
7086 	int field_off = -1;
7087 
7088 	if (!is_const) {
7089 		verbose(env,
7090 			"%s doesn't have constant offset. %s has to be at the constant offset\n",
7091 			reg_arg_name(env, argno), struct_name);
7092 		return -EINVAL;
7093 	}
7094 	if (!map->btf) {
7095 		verbose(env, "map '%s' has to have BTF in order to use %s\n", map->name,
7096 			struct_name);
7097 		return -EINVAL;
7098 	}
7099 	if (!btf_record_has_field(map->record, field_type)) {
7100 		verbose(env, "map '%s' has no valid %s\n", map->name, struct_name);
7101 		return -EINVAL;
7102 	}
7103 	switch (field_type) {
7104 	case BPF_TIMER:
7105 		field_off = map->record->timer_off;
7106 		break;
7107 	case BPF_TASK_WORK:
7108 		field_off = map->record->task_work_off;
7109 		break;
7110 	case BPF_WORKQUEUE:
7111 		field_off = map->record->wq_off;
7112 		break;
7113 	default:
7114 		verifier_bug(env, "unsupported BTF field type: %s\n", struct_name);
7115 		return -EINVAL;
7116 	}
7117 	if (field_off != val) {
7118 		verbose(env, "off %lld doesn't point to 'struct %s' that is at %d\n",
7119 			val, struct_name, field_off);
7120 		return -EINVAL;
7121 	}
7122 	if (map_desc->ptr) {
7123 		verifier_bug(env, "Two map pointers in a %s helper", struct_name);
7124 		return -EFAULT;
7125 	}
7126 	map_desc->uid = reg->map_uid;
7127 	map_desc->ptr = map;
7128 	return 0;
7129 }
7130 
7131 static int process_timer_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7132 			      struct bpf_map_desc *map)
7133 {
7134 	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
7135 		verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n");
7136 		return -EOPNOTSUPP;
7137 	}
7138 	return check_map_field_pointer(env, reg, argno, BPF_TIMER, map);
7139 }
7140 
7141 static int process_timer_helper(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7142 				struct bpf_call_arg_meta *meta)
7143 {
7144 	return process_timer_func(env, reg, argno, &meta->map);
7145 }
7146 
7147 static int process_timer_kfunc(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7148 			       struct bpf_kfunc_call_arg_meta *meta)
7149 {
7150 	return process_timer_func(env, reg, argno, &meta->map);
7151 }
7152 
7153 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
7154 			     struct bpf_call_arg_meta *meta)
7155 {
7156 	struct bpf_reg_state *reg = reg_state(env, regno);
7157 	struct btf_field *kptr_field;
7158 	struct bpf_map *map_ptr;
7159 	struct btf_record *rec;
7160 	u32 kptr_off;
7161 
7162 	if (type_is_ptr_alloc_obj(reg->type)) {
7163 		rec = reg_btf_record(reg);
7164 	} else { /* PTR_TO_MAP_VALUE */
7165 		map_ptr = reg->map_ptr;
7166 		if (!map_ptr->btf) {
7167 			verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
7168 				map_ptr->name);
7169 			return -EINVAL;
7170 		}
7171 		rec = map_ptr->record;
7172 		meta->map.ptr = map_ptr;
7173 	}
7174 
7175 	if (!tnum_is_const(reg->var_off)) {
7176 		verbose(env,
7177 			"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
7178 			regno);
7179 		return -EINVAL;
7180 	}
7181 
7182 	if (!btf_record_has_field(rec, BPF_KPTR)) {
7183 		verbose(env, "R%d has no valid kptr\n", regno);
7184 		return -EINVAL;
7185 	}
7186 
7187 	kptr_off = reg->var_off.value;
7188 	kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
7189 	if (!kptr_field) {
7190 		verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
7191 		return -EACCES;
7192 	}
7193 	if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
7194 		verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
7195 		return -EACCES;
7196 	}
7197 	meta->kptr_field = kptr_field;
7198 	return 0;
7199 }
7200 
7201 /*
7202  * Validate dynptr arguments for helper, kfunc and subprog.
7203  *
7204  * @dynptr is both input and output. It is populated when the argument is
7205  * tagged with MEM_UNINIT (i.e., the dynptr argument that will be constructed)
7206  * and consumed when the argument is expecting to be an initialized dynptr.
7207  * @parent_id is used to track the referenced parent object (e.g., file or skb in
7208  * qdisc program) when constructing a dynptr.
7209  *
7210  * There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
7211  * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
7212  *
7213  * In both cases we deal with the first 8 bytes, but need to mark the next 8
7214  * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
7215  * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
7216  *
7217  * Mutability of bpf_dynptr is at two levels: the dynptr and the memory the
7218  * dynptr points to. At the first level, the verifier will make sure a
7219  * CONST_PTR_TO_DYNPTR cannot be reinitialized or destroyed. The mutability of
7220  * a dynptr's view (i.e., start and offset) is not tracked as there is not such
7221  * use case. The second level is tracked using the upper bit of bpf_dynptr->size
7222  * and checked dynamically during runtime.
7223  */
7224 static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7225 			       argno_t argno, int insn_idx, enum bpf_arg_type arg_type,
7226 			       struct ref_obj_desc *ref_obj, struct bpf_dynptr_desc *dynptr)
7227 {
7228 	int spi, err = 0;
7229 
7230 	if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) {
7231 		verbose(env,
7232 			"%s expected pointer to stack or const struct bpf_dynptr\n",
7233 			reg_arg_name(env, argno));
7234 		return -EINVAL;
7235 	}
7236 
7237 	/*  MEM_UNINIT - Points to memory that is an appropriate candidate for
7238 	 *		 constructing a mutable bpf_dynptr object.
7239 	 *
7240 	 *		 Currently, this is only possible with PTR_TO_STACK
7241 	 *		 pointing to a region of at least 16 bytes which doesn't
7242 	 *		 contain an existing bpf_dynptr.
7243 	 *
7244 	 *  OBJ_RELEASE - Points to a initialized bpf_dynptr that will be
7245 	 *		  destroyed.
7246 	 *
7247 	 *  None       - Points to a initialized dynptr that cannot be
7248 	 *		 reinitialized or destroyed. However, the view of the
7249 	 *		 dynptr and the memory it points to may be mutated.
7250 	 */
7251 	if (arg_type & MEM_UNINIT) {
7252 		int i;
7253 
7254 		if (!is_dynptr_reg_valid_uninit(env, reg)) {
7255 			verbose(env, "Dynptr has to be an uninitialized dynptr\n");
7256 			return -EINVAL;
7257 		}
7258 
7259 		/* we write BPF_DW bits (8 bytes) at a time */
7260 		for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
7261 			err = check_mem_access(env, insn_idx, reg, argno,
7262 					       i, BPF_DW, BPF_WRITE, -1, false, false);
7263 			if (err)
7264 				return err;
7265 		}
7266 
7267 		err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, ref_obj, dynptr);
7268 	} else /* OBJ_RELEASE and None case from above */ {
7269 		/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
7270 		if (reg->type == CONST_PTR_TO_DYNPTR && (arg_type & OBJ_RELEASE)) {
7271 			verbose(env, "CONST_PTR_TO_DYNPTR cannot be released\n");
7272 			return -EINVAL;
7273 		}
7274 
7275 		if (!is_dynptr_reg_valid_init(env, reg)) {
7276 			verbose(env, "Expected an initialized dynptr as %s\n",
7277 				reg_arg_name(env, argno));
7278 			return -EINVAL;
7279 		}
7280 
7281 		/* Fold modifiers (in this case, OBJ_RELEASE) when checking expected type */
7282 		if (!is_dynptr_type_expected(env, reg, arg_type & ~OBJ_RELEASE)) {
7283 			verbose(env,
7284 				"Expected a dynptr of type %s as %s\n",
7285 				dynptr_type_str(arg_to_dynptr_type(arg_type)),
7286 				reg_arg_name(env, argno));
7287 			return -EINVAL;
7288 		}
7289 
7290 		if (reg->type != CONST_PTR_TO_DYNPTR) {
7291 			struct bpf_func_state *state = bpf_func(env, reg);
7292 
7293 			spi = dynptr_get_spi(env, reg);
7294 			if (spi < 0)
7295 				return spi;
7296 
7297 			/*
7298 			 * For CONST_PTR_TO_DYNPTR, reg is already scratched by check_reg_arg
7299 			 * in check_helper_call and mark_btf_func_reg_size in check_kfunc_call.
7300 			 */
7301 			mark_stack_slots_scratched(env, spi, BPF_DYNPTR_NR_SLOTS);
7302 
7303 			reg = &state->stack[spi].spilled_ptr;
7304 		}
7305 
7306 		if (dynptr) {
7307 			dynptr->type = reg->dynptr.type;
7308 			dynptr->id = reg->id;
7309 			dynptr->parent_id = reg->parent_id;
7310 		}
7311 	}
7312 	return err;
7313 }
7314 
7315 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7316 {
7317 	return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
7318 }
7319 
7320 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7321 {
7322 	return meta->kfunc_flags & KF_ITER_NEW;
7323 }
7324 
7325 
7326 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7327 {
7328 	return meta->kfunc_flags & KF_ITER_DESTROY;
7329 }
7330 
7331 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx,
7332 			      const struct btf_param *arg)
7333 {
7334 	/* btf_check_iter_kfuncs() guarantees that first argument of any iter
7335 	 * kfunc is iter state pointer
7336 	 */
7337 	if (is_iter_kfunc(meta))
7338 		return arg_idx == 0;
7339 
7340 	/* iter passed as an argument to a generic kfunc */
7341 	return btf_param_match_suffix(meta->btf, arg, "__iter");
7342 }
7343 
7344 static int process_iter_arg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int insn_idx,
7345 			    struct bpf_kfunc_call_arg_meta *meta)
7346 {
7347 	struct bpf_func_state *state = bpf_func(env, reg);
7348 	const struct btf_type *t;
7349 	u32 arg_idx = arg_idx_from_argno(argno);
7350 	int spi, err, i, nr_slots, btf_id;
7351 
7352 	if (reg->type != PTR_TO_STACK) {
7353 		verbose(env, "%s expected pointer to an iterator on stack\n",
7354 			reg_arg_name(env, argno));
7355 		return -EINVAL;
7356 	}
7357 
7358 	/* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
7359 	 * ensures struct convention, so we wouldn't need to do any BTF
7360 	 * validation here. But given iter state can be passed as a parameter
7361 	 * to any kfunc, if arg has "__iter" suffix, we need to be a bit more
7362 	 * conservative here.
7363 	 */
7364 	btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, arg_idx);
7365 	if (btf_id < 0) {
7366 		verbose(env, "expected valid iter pointer as %s\n",
7367 			reg_arg_name(env, argno));
7368 		return -EINVAL;
7369 	}
7370 	t = btf_type_by_id(meta->btf, btf_id);
7371 	nr_slots = t->size / BPF_REG_SIZE;
7372 
7373 	if (is_iter_new_kfunc(meta)) {
7374 		/* bpf_iter_<type>_new() expects pointer to uninit iter state */
7375 		if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
7376 			verbose(env, "expected uninitialized iter_%s as %s\n",
7377 				iter_type_str(meta->btf, btf_id), reg_arg_name(env, argno));
7378 			return -EINVAL;
7379 		}
7380 
7381 		for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) {
7382 			err = check_mem_access(env, insn_idx, reg, argno,
7383 					       i, BPF_DW, BPF_WRITE, -1, false, false);
7384 			if (err)
7385 				return err;
7386 		}
7387 
7388 		err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots);
7389 		if (err)
7390 			return err;
7391 	} else {
7392 		/* iter_next() or iter_destroy(), as well as any kfunc
7393 		 * accepting iter argument, expect initialized iter state
7394 		 */
7395 		err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
7396 		switch (err) {
7397 		case 0:
7398 			break;
7399 		case -EINVAL:
7400 			verbose(env, "expected an initialized iter_%s as %s\n",
7401 				iter_type_str(meta->btf, btf_id), reg_arg_name(env, argno));
7402 			return err;
7403 		case -EPROTO:
7404 			verbose(env, "expected an RCU CS when using %s\n", meta->func_name);
7405 			return err;
7406 		default:
7407 			return err;
7408 		}
7409 
7410 		spi = iter_get_spi(env, reg, nr_slots);
7411 		if (spi < 0)
7412 			return spi;
7413 
7414 		mark_stack_slots_scratched(env, spi, nr_slots);
7415 
7416 		/* remember meta->iter info for process_iter_next_call() */
7417 		meta->iter.spi = spi;
7418 		meta->iter.frameno = reg->frameno;
7419 		update_ref_obj(&meta->ref_obj, &state->stack[spi].spilled_ptr);
7420 
7421 		if (is_iter_destroy_kfunc(meta)) {
7422 			err = unmark_stack_slots_iter(env, reg, nr_slots);
7423 			if (err)
7424 				return err;
7425 		}
7426 	}
7427 
7428 	return 0;
7429 }
7430 
7431 /* Look for a previous loop entry at insn_idx: nearest parent state
7432  * stopped at insn_idx with callsites matching those in cur->frame.
7433  */
7434 static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
7435 						  struct bpf_verifier_state *cur,
7436 						  int insn_idx)
7437 {
7438 	struct bpf_verifier_state_list *sl;
7439 	struct bpf_verifier_state *st;
7440 	struct list_head *pos, *head;
7441 
7442 	/* Explored states are pushed in stack order, most recent states come first */
7443 	head = bpf_explored_state(env, insn_idx);
7444 	list_for_each(pos, head) {
7445 		sl = container_of(pos, struct bpf_verifier_state_list, node);
7446 		/* If st->branches != 0 state is a part of current DFS verification path,
7447 		 * hence cur & st for a loop.
7448 		 */
7449 		st = &sl->state;
7450 		if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
7451 		    st->dfs_depth < cur->dfs_depth)
7452 			return st;
7453 	}
7454 
7455 	return NULL;
7456 }
7457 
7458 /*
7459  * Check if scalar registers are exact for the purpose of not widening.
7460  * More lenient than regs_exact()
7461  */
7462 static bool scalars_exact_for_widen(const struct bpf_reg_state *rold,
7463 				    const struct bpf_reg_state *rcur)
7464 {
7465 	return !memcmp(rold, rcur, offsetof(struct bpf_reg_state, id));
7466 }
7467 
7468 static void maybe_widen_reg(struct bpf_verifier_env *env,
7469 			    struct bpf_reg_state *rold, struct bpf_reg_state *rcur)
7470 {
7471 	if (rold->type != SCALAR_VALUE)
7472 		return;
7473 	if (rold->type != rcur->type)
7474 		return;
7475 	if (rold->precise || rcur->precise || scalars_exact_for_widen(rold, rcur))
7476 		return;
7477 	__mark_reg_unknown(env, rcur);
7478 }
7479 
7480 static int widen_imprecise_scalars(struct bpf_verifier_env *env,
7481 				   struct bpf_verifier_state *old,
7482 				   struct bpf_verifier_state *cur)
7483 {
7484 	struct bpf_func_state *fold, *fcur;
7485 	int i, fr, num_slots;
7486 
7487 	for (fr = old->curframe; fr >= 0; fr--) {
7488 		fold = old->frame[fr];
7489 		fcur = cur->frame[fr];
7490 
7491 		for (i = 0; i < MAX_BPF_REG; i++)
7492 			maybe_widen_reg(env,
7493 					&fold->regs[i],
7494 					&fcur->regs[i]);
7495 
7496 		num_slots = min(fold->allocated_stack / BPF_REG_SIZE,
7497 				fcur->allocated_stack / BPF_REG_SIZE);
7498 		for (i = 0; i < num_slots; i++) {
7499 			if (!bpf_is_spilled_reg(&fold->stack[i]) ||
7500 			    !bpf_is_spilled_reg(&fcur->stack[i]))
7501 				continue;
7502 
7503 			maybe_widen_reg(env,
7504 					&fold->stack[i].spilled_ptr,
7505 					&fcur->stack[i].spilled_ptr);
7506 		}
7507 	}
7508 	return 0;
7509 }
7510 
7511 static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st,
7512 						 struct bpf_kfunc_call_arg_meta *meta)
7513 {
7514 	int iter_frameno = meta->iter.frameno;
7515 	int iter_spi = meta->iter.spi;
7516 
7517 	return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
7518 }
7519 
7520 /* process_iter_next_call() is called when verifier gets to iterator's next
7521  * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
7522  * to it as just "iter_next()" in comments below.
7523  *
7524  * BPF verifier relies on a crucial contract for any iter_next()
7525  * implementation: it should *eventually* return NULL, and once that happens
7526  * it should keep returning NULL. That is, once iterator exhausts elements to
7527  * iterate, it should never reset or spuriously return new elements.
7528  *
7529  * With the assumption of such contract, process_iter_next_call() simulates
7530  * a fork in the verifier state to validate loop logic correctness and safety
7531  * without having to simulate infinite amount of iterations.
7532  *
7533  * In current state, we first assume that iter_next() returned NULL and
7534  * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
7535  * conditions we should not form an infinite loop and should eventually reach
7536  * exit.
7537  *
7538  * Besides that, we also fork current state and enqueue it for later
7539  * verification. In a forked state we keep iterator state as ACTIVE
7540  * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
7541  * also bump iteration depth to prevent erroneous infinite loop detection
7542  * later on (see iter_active_depths_differ() comment for details). In this
7543  * state we assume that we'll eventually loop back to another iter_next()
7544  * calls (it could be in exactly same location or in some other instruction,
7545  * it doesn't matter, we don't make any unnecessary assumptions about this,
7546  * everything revolves around iterator state in a stack slot, not which
7547  * instruction is calling iter_next()). When that happens, we either will come
7548  * to iter_next() with equivalent state and can conclude that next iteration
7549  * will proceed in exactly the same way as we just verified, so it's safe to
7550  * assume that loop converges. If not, we'll go on another iteration
7551  * simulation with a different input state, until all possible starting states
7552  * are validated or we reach maximum number of instructions limit.
7553  *
7554  * This way, we will either exhaustively discover all possible input states
7555  * that iterator loop can start with and eventually will converge, or we'll
7556  * effectively regress into bounded loop simulation logic and either reach
7557  * maximum number of instructions if loop is not provably convergent, or there
7558  * is some statically known limit on number of iterations (e.g., if there is
7559  * an explicit `if n > 100 then break;` statement somewhere in the loop).
7560  *
7561  * Iteration convergence logic in is_state_visited() relies on exact
7562  * states comparison, which ignores read and precision marks.
7563  * This is necessary because read and precision marks are not finalized
7564  * while in the loop. Exact comparison might preclude convergence for
7565  * simple programs like below:
7566  *
7567  *     i = 0;
7568  *     while(iter_next(&it))
7569  *       i++;
7570  *
7571  * At each iteration step i++ would produce a new distinct state and
7572  * eventually instruction processing limit would be reached.
7573  *
7574  * To avoid such behavior speculatively forget (widen) range for
7575  * imprecise scalar registers, if those registers were not precise at the
7576  * end of the previous iteration and do not match exactly.
7577  *
7578  * This is a conservative heuristic that allows to verify wide range of programs,
7579  * however it precludes verification of programs that conjure an
7580  * imprecise value on the first loop iteration and use it as precise on a second.
7581  * For example, the following safe program would fail to verify:
7582  *
7583  *     struct bpf_num_iter it;
7584  *     int arr[10];
7585  *     int i = 0, a = 0;
7586  *     bpf_iter_num_new(&it, 0, 10);
7587  *     while (bpf_iter_num_next(&it)) {
7588  *       if (a == 0) {
7589  *         a = 1;
7590  *         i = 7; // Because i changed verifier would forget
7591  *                // it's range on second loop entry.
7592  *       } else {
7593  *         arr[i] = 42; // This would fail to verify.
7594  *       }
7595  *     }
7596  *     bpf_iter_num_destroy(&it);
7597  */
7598 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
7599 				  struct bpf_kfunc_call_arg_meta *meta)
7600 {
7601 	struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
7602 	struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
7603 	struct bpf_reg_state *cur_iter, *queued_iter;
7604 
7605 	BTF_TYPE_EMIT(struct bpf_iter);
7606 
7607 	cur_iter = get_iter_from_state(cur_st, meta);
7608 
7609 	if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
7610 	    cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
7611 		verifier_bug(env, "unexpected iterator state %d (%s)",
7612 			     cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
7613 		return -EFAULT;
7614 	}
7615 
7616 	if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
7617 		/* Because iter_next() call is a checkpoint is_state_visitied()
7618 		 * should guarantee parent state with same call sites and insn_idx.
7619 		 */
7620 		if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
7621 		    !same_callsites(cur_st->parent, cur_st)) {
7622 			verifier_bug(env, "bad parent state for iter next call");
7623 			return -EFAULT;
7624 		}
7625 		/* Note cur_st->parent in the call below, it is necessary to skip
7626 		 * checkpoint created for cur_st by is_state_visited()
7627 		 * right at this instruction.
7628 		 */
7629 		prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
7630 		/* branch out active iter state */
7631 		queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
7632 		if (IS_ERR(queued_st))
7633 			return PTR_ERR(queued_st);
7634 
7635 		queued_iter = get_iter_from_state(queued_st, meta);
7636 		queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
7637 		queued_iter->iter.depth++;
7638 		if (prev_st)
7639 			widen_imprecise_scalars(env, prev_st, queued_st);
7640 
7641 		queued_fr = queued_st->frame[queued_st->curframe];
7642 		mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
7643 	}
7644 
7645 	/* switch to DRAINED state, but keep the depth unchanged */
7646 	/* mark current iter state as drained and assume returned NULL */
7647 	cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
7648 	__mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]);
7649 
7650 	return 0;
7651 }
7652 
7653 static bool arg_type_is_mem_size(enum bpf_arg_type type)
7654 {
7655 	return type == ARG_CONST_SIZE ||
7656 	       type == ARG_CONST_SIZE_OR_ZERO;
7657 }
7658 
7659 static bool arg_type_is_raw_mem(enum bpf_arg_type type)
7660 {
7661 	return base_type(type) == ARG_PTR_TO_MEM &&
7662 	       type & MEM_UNINIT;
7663 }
7664 
7665 static bool arg_type_is_release(enum bpf_arg_type type)
7666 {
7667 	return type & OBJ_RELEASE;
7668 }
7669 
7670 static bool arg_type_is_dynptr(enum bpf_arg_type type)
7671 {
7672 	return base_type(type) == ARG_PTR_TO_DYNPTR;
7673 }
7674 
7675 static int resolve_map_arg_type(struct bpf_verifier_env *env,
7676 				 const struct bpf_call_arg_meta *meta,
7677 				 enum bpf_arg_type *arg_type)
7678 {
7679 	if (!meta->map.ptr) {
7680 		/* kernel subsystem misconfigured verifier */
7681 		verifier_bug(env, "invalid map_ptr to access map->type");
7682 		return -EFAULT;
7683 	}
7684 
7685 	switch (meta->map.ptr->map_type) {
7686 	case BPF_MAP_TYPE_SOCKMAP:
7687 	case BPF_MAP_TYPE_SOCKHASH:
7688 		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
7689 			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
7690 		} else {
7691 			verbose(env, "invalid arg_type for sockmap/sockhash\n");
7692 			return -EINVAL;
7693 		}
7694 		break;
7695 	case BPF_MAP_TYPE_BLOOM_FILTER:
7696 		if (meta->func_id == BPF_FUNC_map_peek_elem)
7697 			*arg_type = ARG_PTR_TO_MAP_VALUE;
7698 		break;
7699 	default:
7700 		break;
7701 	}
7702 	return 0;
7703 }
7704 
7705 struct bpf_reg_types {
7706 	const enum bpf_reg_type types[10];
7707 	u32 *btf_id;
7708 };
7709 
7710 static const struct bpf_reg_types sock_types = {
7711 	.types = {
7712 		PTR_TO_SOCK_COMMON,
7713 		PTR_TO_SOCKET,
7714 		PTR_TO_TCP_SOCK,
7715 		PTR_TO_XDP_SOCK,
7716 	},
7717 };
7718 
7719 #ifdef CONFIG_NET
7720 static const struct bpf_reg_types btf_id_sock_common_types = {
7721 	.types = {
7722 		PTR_TO_SOCK_COMMON,
7723 		PTR_TO_SOCKET,
7724 		PTR_TO_TCP_SOCK,
7725 		PTR_TO_XDP_SOCK,
7726 		PTR_TO_BTF_ID,
7727 		PTR_TO_BTF_ID | PTR_TRUSTED,
7728 	},
7729 	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
7730 };
7731 #endif
7732 
7733 static const struct bpf_reg_types mem_types = {
7734 	.types = {
7735 		PTR_TO_STACK,
7736 		PTR_TO_PACKET,
7737 		PTR_TO_PACKET_META,
7738 		PTR_TO_MAP_KEY,
7739 		PTR_TO_MAP_VALUE,
7740 		PTR_TO_MEM,
7741 		PTR_TO_MEM | MEM_RINGBUF,
7742 		PTR_TO_BUF,
7743 		PTR_TO_BTF_ID | PTR_TRUSTED,
7744 		PTR_TO_CTX,
7745 	},
7746 };
7747 
7748 static const struct bpf_reg_types spin_lock_types = {
7749 	.types = {
7750 		PTR_TO_MAP_VALUE,
7751 		PTR_TO_BTF_ID | MEM_ALLOC,
7752 	}
7753 };
7754 
7755 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
7756 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
7757 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
7758 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
7759 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
7760 static const struct bpf_reg_types btf_ptr_types = {
7761 	.types = {
7762 		PTR_TO_BTF_ID,
7763 		PTR_TO_BTF_ID | PTR_TRUSTED,
7764 		PTR_TO_BTF_ID | MEM_RCU,
7765 	},
7766 };
7767 static const struct bpf_reg_types percpu_btf_ptr_types = {
7768 	.types = {
7769 		PTR_TO_BTF_ID | MEM_PERCPU,
7770 		PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU,
7771 		PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
7772 	}
7773 };
7774 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
7775 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
7776 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
7777 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
7778 static const struct bpf_reg_types kptr_xchg_dest_types = {
7779 	.types = {
7780 		PTR_TO_MAP_VALUE,
7781 		PTR_TO_BTF_ID | MEM_ALLOC,
7782 		PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF,
7783 		PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU,
7784 	}
7785 };
7786 static const struct bpf_reg_types dynptr_types = {
7787 	.types = {
7788 		PTR_TO_STACK,
7789 		CONST_PTR_TO_DYNPTR,
7790 	}
7791 };
7792 
7793 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
7794 	[ARG_PTR_TO_MAP_KEY]		= &mem_types,
7795 	[ARG_PTR_TO_MAP_VALUE]		= &mem_types,
7796 	[ARG_CONST_SIZE]		= &scalar_types,
7797 	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
7798 	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
7799 	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
7800 	[ARG_PTR_TO_CTX]		= &context_types,
7801 	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
7802 #ifdef CONFIG_NET
7803 	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
7804 #endif
7805 	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
7806 	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
7807 	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
7808 	[ARG_PTR_TO_MEM]		= &mem_types,
7809 	[ARG_PTR_TO_RINGBUF_MEM]	= &ringbuf_mem_types,
7810 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
7811 	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
7812 	[ARG_PTR_TO_STACK]		= &stack_ptr_types,
7813 	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
7814 	[ARG_PTR_TO_TIMER]		= &timer_types,
7815 	[ARG_KPTR_XCHG_DEST]		= &kptr_xchg_dest_types,
7816 	[ARG_PTR_TO_DYNPTR]		= &dynptr_types,
7817 };
7818 
7819 static int check_reg_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
7820 			  enum bpf_arg_type arg_type,
7821 			  const u32 *arg_btf_id,
7822 			  struct bpf_call_arg_meta *meta)
7823 {
7824 	enum bpf_reg_type expected, type = reg->type;
7825 	const struct bpf_reg_types *compatible;
7826 	int i, j, err;
7827 
7828 	compatible = compatible_reg_types[base_type(arg_type)];
7829 	if (!compatible) {
7830 		verifier_bug(env, "unsupported arg type %d", arg_type);
7831 		return -EFAULT;
7832 	}
7833 
7834 	/* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
7835 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
7836 	 *
7837 	 * Same for MAYBE_NULL:
7838 	 *
7839 	 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
7840 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
7841 	 *
7842 	 * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
7843 	 *
7844 	 * Therefore we fold these flags depending on the arg_type before comparison.
7845 	 */
7846 	if (arg_type & MEM_RDONLY)
7847 		type &= ~MEM_RDONLY;
7848 	if (arg_type & PTR_MAYBE_NULL)
7849 		type &= ~PTR_MAYBE_NULL;
7850 	if (base_type(arg_type) == ARG_PTR_TO_MEM)
7851 		type &= ~DYNPTR_TYPE_FLAG_MASK;
7852 
7853 	/* Local kptr types are allowed as the source argument of bpf_kptr_xchg */
7854 	if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && reg_from_argno(argno) == BPF_REG_2) {
7855 		type &= ~MEM_ALLOC;
7856 		type &= ~MEM_PERCPU;
7857 	}
7858 
7859 	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
7860 		expected = compatible->types[i];
7861 		if (expected == NOT_INIT)
7862 			break;
7863 
7864 		if (type == expected)
7865 			goto found;
7866 	}
7867 
7868 	verbose(env, "%s type=%s expected=", reg_arg_name(env, argno), reg_type_str(env, reg->type));
7869 	for (j = 0; j + 1 < i; j++)
7870 		verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
7871 	verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
7872 	return -EACCES;
7873 
7874 found:
7875 	if (base_type(reg->type) != PTR_TO_BTF_ID)
7876 		return 0;
7877 
7878 	if (compatible == &mem_types) {
7879 		if (!(arg_type & MEM_RDONLY)) {
7880 			verbose(env,
7881 				"%s() may write into memory pointed by %s type=%s\n",
7882 				func_id_name(meta->func_id),
7883 				reg_arg_name(env, argno), reg_type_str(env, reg->type));
7884 			return -EACCES;
7885 		}
7886 		return 0;
7887 	}
7888 
7889 	switch ((int)reg->type) {
7890 	case PTR_TO_BTF_ID:
7891 	case PTR_TO_BTF_ID | PTR_TRUSTED:
7892 	case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL:
7893 	case PTR_TO_BTF_ID | MEM_RCU:
7894 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
7895 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
7896 	{
7897 		/* For bpf_sk_release, it needs to match against first member
7898 		 * 'struct sock_common', hence make an exception for it. This
7899 		 * allows bpf_sk_release to work for multiple socket types.
7900 		 */
7901 		bool strict_type_match = arg_type_is_release(arg_type) &&
7902 					 meta->func_id != BPF_FUNC_sk_release;
7903 
7904 		if (type_may_be_null(reg->type) &&
7905 		    (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) {
7906 			verbose(env, "Possibly NULL pointer passed to helper %s\n",
7907 				reg_arg_name(env, argno));
7908 			return -EACCES;
7909 		}
7910 
7911 		if (!arg_btf_id) {
7912 			if (!compatible->btf_id) {
7913 				verifier_bug(env, "missing arg compatible BTF ID");
7914 				return -EFAULT;
7915 			}
7916 			arg_btf_id = compatible->btf_id;
7917 		}
7918 
7919 		if (meta->func_id == BPF_FUNC_kptr_xchg) {
7920 			if (map_kptr_match_type(env, meta->kptr_field, reg, reg_from_argno(argno)))
7921 				return -EACCES;
7922 		} else {
7923 			if (arg_btf_id == BPF_PTR_POISON) {
7924 				verbose(env, "verifier internal error:");
7925 				verbose(env, "%s has non-overwritten BPF_PTR_POISON type\n",
7926 					reg_arg_name(env, argno));
7927 				return -EACCES;
7928 			}
7929 
7930 			err = __check_ptr_off_reg(env, reg, argno, true);
7931 			if (err)
7932 				return err;
7933 
7934 			if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id,
7935 						  reg->var_off.value, btf_vmlinux, *arg_btf_id,
7936 						  strict_type_match)) {
7937 				verbose(env, "%s is of type %s but %s is expected\n",
7938 					reg_arg_name(env, argno),
7939 					btf_type_name(reg->btf, reg->btf_id),
7940 					btf_type_name(btf_vmlinux, *arg_btf_id));
7941 				return -EACCES;
7942 			}
7943 		}
7944 		break;
7945 	}
7946 	case PTR_TO_BTF_ID | MEM_ALLOC:
7947 	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
7948 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
7949 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
7950 		if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
7951 		    meta->func_id != BPF_FUNC_kptr_xchg) {
7952 			verifier_bug(env, "unimplemented handling of MEM_ALLOC");
7953 			return -EFAULT;
7954 		}
7955 		/* Check if local kptr in src arg matches kptr in dst arg */
7956 		if (meta->func_id == BPF_FUNC_kptr_xchg) {
7957 			int regno = reg_from_argno(argno);
7958 
7959 			if (regno == BPF_REG_2 &&
7960 			    map_kptr_match_type(env, meta->kptr_field, reg, regno))
7961 				return -EACCES;
7962 		}
7963 		break;
7964 	case PTR_TO_BTF_ID | MEM_PERCPU:
7965 	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU:
7966 	case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
7967 		/* Handled by helper specific checks */
7968 		break;
7969 	default:
7970 		verifier_bug(env, "invalid PTR_TO_BTF_ID register for type match");
7971 		return -EFAULT;
7972 	}
7973 	return 0;
7974 }
7975 
7976 static struct btf_field *
7977 reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
7978 {
7979 	struct btf_field *field;
7980 	struct btf_record *rec;
7981 
7982 	rec = reg_btf_record(reg);
7983 	if (!rec)
7984 		return NULL;
7985 
7986 	field = btf_record_find(rec, off, fields);
7987 	if (!field)
7988 		return NULL;
7989 
7990 	return field;
7991 }
7992 
7993 static int check_func_arg_reg_off(struct bpf_verifier_env *env,
7994 				  const struct bpf_reg_state *reg, argno_t argno,
7995 				  enum bpf_arg_type arg_type)
7996 {
7997 	u32 type = reg->type;
7998 
7999 	/* When referenced register is passed to release function, its fixed
8000 	 * offset must be 0.
8001 	 *
8002 	 * We will check arg_type_is_release reg has id when storing
8003 	 * meta->release_regno.
8004 	 */
8005 	if (arg_type_is_release(arg_type)) {
8006 		/* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
8007 		 * may not directly point to the object being released, but to
8008 		 * dynptr pointing to such object, which might be at some offset
8009 		 * on the stack. In that case, we simply to fallback to the
8010 		 * default handling.
8011 		 */
8012 		if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
8013 			return 0;
8014 
8015 		/* Doing check_ptr_off_reg check for the offset will catch this
8016 		 * because fixed_off_ok is false, but checking here allows us
8017 		 * to give the user a better error message.
8018 		 */
8019 		if (!tnum_is_const(reg->var_off) || reg->var_off.value != 0) {
8020 			verbose(env, "%s must have zero offset when passed to release func or trusted arg to kfunc\n",
8021 				reg_arg_name(env, argno));
8022 			return -EINVAL;
8023 		}
8024 	}
8025 
8026 	switch (type) {
8027 	/* Pointer types where both fixed and variable offset is explicitly allowed: */
8028 	case PTR_TO_STACK:
8029 	case PTR_TO_PACKET:
8030 	case PTR_TO_PACKET_META:
8031 	case PTR_TO_MAP_KEY:
8032 	case PTR_TO_MAP_VALUE:
8033 	case PTR_TO_MEM:
8034 	case PTR_TO_MEM | MEM_RDONLY:
8035 	case PTR_TO_MEM | MEM_RINGBUF:
8036 	case PTR_TO_BUF:
8037 	case PTR_TO_BUF | MEM_RDONLY:
8038 	case PTR_TO_ARENA:
8039 	case SCALAR_VALUE:
8040 		return 0;
8041 	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
8042 	 * fixed offset.
8043 	 */
8044 	case PTR_TO_BTF_ID:
8045 	case PTR_TO_BTF_ID | MEM_ALLOC:
8046 	case PTR_TO_BTF_ID | PTR_TRUSTED:
8047 	case PTR_TO_BTF_ID | MEM_RCU:
8048 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
8049 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
8050 		/* When referenced PTR_TO_BTF_ID is passed to release function,
8051 		 * its fixed offset must be 0. In the other cases, fixed offset
8052 		 * can be non-zero. This was already checked above. So pass
8053 		 * fixed_off_ok as true to allow fixed offset for all other
8054 		 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
8055 		 * still need to do checks instead of returning.
8056 		 */
8057 		return __check_ptr_off_reg(env, reg, argno, true);
8058 	case PTR_TO_CTX:
8059 		/*
8060 		 * Allow fixed and variable offsets for syscall context, but
8061 		 * only when the argument is passed as memory, not ctx,
8062 		 * otherwise we may get modified ctx in tail called programs and
8063 		 * global subprogs (that may act as extension prog hooks).
8064 		 */
8065 		if (arg_type != ARG_PTR_TO_CTX && is_var_ctx_off_allowed(env->prog))
8066 			return 0;
8067 		fallthrough;
8068 	default:
8069 		return __check_ptr_off_reg(env, reg, argno, false);
8070 	}
8071 }
8072 
8073 static int check_arg_const_str(struct bpf_verifier_env *env,
8074 			       struct bpf_reg_state *reg, argno_t argno)
8075 {
8076 	struct bpf_map *map = reg->map_ptr;
8077 	int err;
8078 	int map_off;
8079 	u64 map_addr;
8080 	char *str_ptr;
8081 
8082 	if (reg->type != PTR_TO_MAP_VALUE)
8083 		return -EINVAL;
8084 
8085 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
8086 		verbose(env, "%s points to insn_array map which cannot be used as const string\n",
8087 			reg_arg_name(env, argno));
8088 		return -EACCES;
8089 	}
8090 
8091 	if (!bpf_map_is_rdonly(map)) {
8092 		verbose(env, "%s does not point to a readonly map'\n", reg_arg_name(env, argno));
8093 		return -EACCES;
8094 	}
8095 
8096 	if (!tnum_is_const(reg->var_off)) {
8097 		verbose(env, "%s is not a constant address'\n", reg_arg_name(env, argno));
8098 		return -EACCES;
8099 	}
8100 
8101 	if (!map->ops->map_direct_value_addr) {
8102 		verbose(env, "no direct value access support for this map type\n");
8103 		return -EACCES;
8104 	}
8105 
8106 	err = check_map_access(env, reg, argno, 0,
8107 			       map->value_size - reg->var_off.value, false,
8108 			       ACCESS_HELPER);
8109 	if (err)
8110 		return err;
8111 
8112 	map_off = reg->var_off.value;
8113 	err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
8114 	if (err) {
8115 		verbose(env, "direct value access on string failed\n");
8116 		return err;
8117 	}
8118 
8119 	str_ptr = (char *)(long)(map_addr);
8120 	if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
8121 		verbose(env, "string is not zero-terminated\n");
8122 		return -EINVAL;
8123 	}
8124 	return 0;
8125 }
8126 
8127 /* Returns constant key value in `value` if possible, else negative error */
8128 static int get_constant_map_key(struct bpf_verifier_env *env,
8129 				struct bpf_reg_state *key,
8130 				u32 key_size,
8131 				s64 *value)
8132 {
8133 	struct bpf_func_state *state = bpf_func(env, key);
8134 	struct bpf_reg_state *reg;
8135 	int slot, spi, off;
8136 	int spill_size = 0;
8137 	int zero_size = 0;
8138 	int stack_off;
8139 	int i, err;
8140 	u8 *stype;
8141 
8142 	if (!env->bpf_capable)
8143 		return -EOPNOTSUPP;
8144 	if (key->type != PTR_TO_STACK)
8145 		return -EOPNOTSUPP;
8146 	if (!tnum_is_const(key->var_off))
8147 		return -EOPNOTSUPP;
8148 
8149 	stack_off = key->var_off.value;
8150 	slot = -stack_off - 1;
8151 	spi = slot / BPF_REG_SIZE;
8152 	off = slot % BPF_REG_SIZE;
8153 	stype = state->stack[spi].slot_type;
8154 
8155 	/* First handle precisely tracked STACK_ZERO */
8156 	for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--)
8157 		zero_size++;
8158 	if (zero_size >= key_size) {
8159 		*value = 0;
8160 		return 0;
8161 	}
8162 
8163 	/* Check that stack contains a scalar spill of expected size */
8164 	if (!bpf_is_spilled_scalar_reg(&state->stack[spi]))
8165 		return -EOPNOTSUPP;
8166 	for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--)
8167 		spill_size++;
8168 	if (spill_size != key_size)
8169 		return -EOPNOTSUPP;
8170 
8171 	reg = &state->stack[spi].spilled_ptr;
8172 	if (!tnum_is_const(reg->var_off))
8173 		/* Stack value not statically known */
8174 		return -EOPNOTSUPP;
8175 
8176 	/* We are relying on a constant value. So mark as precise
8177 	 * to prevent pruning on it.
8178 	 */
8179 	bpf_bt_set_frame_slot(&env->bt, key->frameno, spi);
8180 	err = mark_chain_precision_batch(env, env->cur_state);
8181 	if (err < 0)
8182 		return err;
8183 
8184 	*value = reg->var_off.value;
8185 	return 0;
8186 }
8187 
8188 static bool can_elide_value_nullness(const struct bpf_map *map);
8189 
8190 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
8191 			  struct bpf_call_arg_meta *meta,
8192 			  const struct bpf_func_proto *fn,
8193 			  int insn_idx)
8194 {
8195 	u32 regno = BPF_REG_1 + arg;
8196 	struct bpf_reg_state *reg = reg_state(env, regno);
8197 	enum bpf_arg_type arg_type = fn->arg_type[arg];
8198 	argno_t argno = argno_from_arg(arg + 1);
8199 	enum bpf_reg_type type = reg->type;
8200 	u32 *arg_btf_id = NULL;
8201 	u32 key_size;
8202 	int err = 0;
8203 
8204 	if (arg_type == ARG_DONTCARE)
8205 		return 0;
8206 
8207 	err = check_reg_arg(env, regno, SRC_OP);
8208 	if (err)
8209 		return err;
8210 
8211 	if (arg_type == ARG_ANYTHING) {
8212 		if (is_pointer_value(env, regno)) {
8213 			verbose(env, "R%d leaks addr into helper function\n",
8214 				regno);
8215 			return -EACCES;
8216 		}
8217 		return 0;
8218 	}
8219 
8220 	if (type_is_pkt_pointer(type) &&
8221 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
8222 		verbose(env, "helper access to the packet is not allowed\n");
8223 		return -EACCES;
8224 	}
8225 
8226 	if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
8227 		err = resolve_map_arg_type(env, meta, &arg_type);
8228 		if (err)
8229 			return err;
8230 	}
8231 
8232 	if (bpf_register_is_null(reg) && type_may_be_null(arg_type))
8233 		/* A NULL register has a SCALAR_VALUE type, so skip
8234 		 * type checking.
8235 		 */
8236 		goto skip_type_check;
8237 
8238 	/* arg_btf_id and arg_size are in a union. */
8239 	if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
8240 	    base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
8241 		arg_btf_id = fn->arg_btf_id[arg];
8242 
8243 	err = check_reg_type(env, reg, argno_from_reg(regno), arg_type, arg_btf_id, meta);
8244 	if (err)
8245 		return err;
8246 
8247 	err = check_func_arg_reg_off(env, reg, argno_from_reg(regno), arg_type);
8248 	if (err)
8249 		return err;
8250 
8251 skip_type_check:
8252 	if (arg_type_is_release(arg_type) && !arg_type_is_dynptr(arg_type) &&
8253 	    !reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) {
8254 		verbose(env, "release helper %s expects referenced PTR_TO_BTF_ID passed to %s\n",
8255 			func_id_name(meta->func_id), reg_arg_name(env, argno));
8256 		return -EINVAL;
8257 	}
8258 
8259 	if (reg_is_referenced(env, reg))
8260 		update_ref_obj(&meta->ref_obj, reg);
8261 
8262 	switch (base_type(arg_type)) {
8263 	case ARG_CONST_MAP_PTR:
8264 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
8265 		if (meta->map.ptr) {
8266 			/* Use map_uid (which is unique id of inner map) to reject:
8267 			 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
8268 			 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
8269 			 * if (inner_map1 && inner_map2) {
8270 			 *     timer = bpf_map_lookup_elem(inner_map1);
8271 			 *     if (timer)
8272 			 *         // mismatch would have been allowed
8273 			 *         bpf_timer_init(timer, inner_map2);
8274 			 * }
8275 			 *
8276 			 * Comparing map_ptr is enough to distinguish normal and outer maps.
8277 			 */
8278 			if (meta->map.ptr != reg->map_ptr ||
8279 			    meta->map.uid != reg->map_uid) {
8280 				verbose(env,
8281 					"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
8282 					meta->map.uid, reg->map_uid);
8283 				return -EINVAL;
8284 			}
8285 		}
8286 		meta->map.ptr = reg->map_ptr;
8287 		meta->map.uid = reg->map_uid;
8288 		break;
8289 	case ARG_PTR_TO_MAP_KEY:
8290 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
8291 		 * check that [key, key + map->key_size) are within
8292 		 * stack limits and initialized
8293 		 */
8294 		if (!meta->map.ptr) {
8295 			/* in function declaration map_ptr must come before
8296 			 * map_key, so that it's verified and known before
8297 			 * we have to check map_key here. Otherwise it means
8298 			 * that kernel subsystem misconfigured verifier
8299 			 */
8300 			verifier_bug(env, "invalid map_ptr to access map->key");
8301 			return -EFAULT;
8302 		}
8303 		key_size = meta->map.ptr->key_size;
8304 		err = check_helper_mem_access(env, reg, argno_from_reg(regno), key_size, BPF_READ, false, NULL);
8305 		if (err)
8306 			return err;
8307 		if (can_elide_value_nullness(meta->map.ptr)) {
8308 			err = get_constant_map_key(env, reg, key_size, &meta->const_map_key);
8309 			if (err < 0) {
8310 				meta->const_map_key = -1;
8311 				if (err == -EOPNOTSUPP)
8312 					err = 0;
8313 				else
8314 					return err;
8315 			}
8316 		}
8317 		break;
8318 	case ARG_PTR_TO_MAP_VALUE:
8319 		if (type_may_be_null(arg_type) && bpf_register_is_null(reg))
8320 			return 0;
8321 
8322 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
8323 		 * check [value, value + map->value_size) validity
8324 		 */
8325 		if (!meta->map.ptr) {
8326 			/* kernel subsystem misconfigured verifier */
8327 			verifier_bug(env, "invalid map_ptr to access map->value");
8328 			return -EFAULT;
8329 		}
8330 		meta->raw_mode = arg_type & MEM_UNINIT;
8331 		err = check_helper_mem_access(env, reg, argno_from_reg(regno), meta->map.ptr->value_size,
8332 					      arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
8333 					      false, meta);
8334 		break;
8335 	case ARG_PTR_TO_PERCPU_BTF_ID:
8336 		if (!reg->btf_id) {
8337 			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
8338 			return -EACCES;
8339 		}
8340 		meta->ret_btf = reg->btf;
8341 		meta->ret_btf_id = reg->btf_id;
8342 		break;
8343 	case ARG_PTR_TO_SPIN_LOCK:
8344 		if (in_rbtree_lock_required_cb(env)) {
8345 			verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
8346 			return -EACCES;
8347 		}
8348 		if (meta->func_id == BPF_FUNC_spin_lock) {
8349 			err = process_spin_lock(env, reg, argno_from_reg(regno), PROCESS_SPIN_LOCK);
8350 			if (err)
8351 				return err;
8352 		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
8353 			err = process_spin_lock(env, reg, argno_from_reg(regno), 0);
8354 			if (err)
8355 				return err;
8356 		} else {
8357 			verifier_bug(env, "spin lock arg on unexpected helper");
8358 			return -EFAULT;
8359 		}
8360 		break;
8361 	case ARG_PTR_TO_TIMER:
8362 		err = process_timer_helper(env, reg, argno_from_reg(regno), meta);
8363 		if (err)
8364 			return err;
8365 		break;
8366 	case ARG_PTR_TO_FUNC:
8367 		meta->subprogno = reg->subprogno;
8368 		break;
8369 	case ARG_PTR_TO_MEM:
8370 		/* The access to this pointer is only checked when we hit the
8371 		 * next is_mem_size argument below.
8372 		 */
8373 		meta->raw_mode = arg_type & MEM_UNINIT;
8374 		if (arg_type & MEM_FIXED_SIZE) {
8375 			err = check_helper_mem_access(env, reg, argno_from_reg(regno), fn->arg_size[arg],
8376 						      arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
8377 						      false, meta);
8378 			if (err)
8379 				return err;
8380 			if (arg_type & MEM_ALIGNED)
8381 				err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
8382 		}
8383 		break;
8384 	case ARG_CONST_SIZE:
8385 		err = check_mem_size_reg(env, reg_state(env, regno - 1), reg, argno_from_reg(regno - 1),
8386 					 argno_from_reg(regno),
8387 					 fn->arg_type[arg - 1] & MEM_WRITE ?
8388 					 BPF_WRITE : BPF_READ,
8389 					 false, meta);
8390 		break;
8391 	case ARG_CONST_SIZE_OR_ZERO:
8392 		err = check_mem_size_reg(env, reg_state(env, regno - 1), reg, argno_from_reg(regno - 1),
8393 					 argno_from_reg(regno),
8394 					 fn->arg_type[arg - 1] & MEM_WRITE ?
8395 					 BPF_WRITE : BPF_READ,
8396 					 true, meta);
8397 		break;
8398 	case ARG_PTR_TO_DYNPTR:
8399 		err = process_dynptr_func(env, reg, argno_from_reg(regno), insn_idx, arg_type, &meta->ref_obj,
8400 					  &meta->dynptr);
8401 		if (err)
8402 			return err;
8403 		break;
8404 	case ARG_CONST_ALLOC_SIZE_OR_ZERO:
8405 		if (!tnum_is_const(reg->var_off)) {
8406 			verbose(env, "R%d is not a known constant'\n",
8407 				regno);
8408 			return -EACCES;
8409 		}
8410 		meta->mem_size = reg->var_off.value;
8411 		err = mark_chain_precision(env, regno);
8412 		if (err)
8413 			return err;
8414 		break;
8415 	case ARG_PTR_TO_CONST_STR:
8416 	{
8417 		err = check_arg_const_str(env, reg, argno_from_reg(regno));
8418 		if (err)
8419 			return err;
8420 		break;
8421 	}
8422 	case ARG_KPTR_XCHG_DEST:
8423 		err = process_kptr_func(env, regno, meta);
8424 		if (err)
8425 			return err;
8426 		break;
8427 	}
8428 
8429 	return err;
8430 }
8431 
8432 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
8433 {
8434 	enum bpf_attach_type eatype = env->prog->expected_attach_type;
8435 	enum bpf_prog_type type = resolve_prog_type(env->prog);
8436 
8437 	if (func_id != BPF_FUNC_map_update_elem &&
8438 	    func_id != BPF_FUNC_map_delete_elem)
8439 		return false;
8440 
8441 	/* It's not possible to get access to a locked struct sock in these
8442 	 * contexts, so updating is safe.
8443 	 */
8444 	switch (type) {
8445 	case BPF_PROG_TYPE_TRACING:
8446 		if (eatype == BPF_TRACE_ITER)
8447 			return true;
8448 		break;
8449 	case BPF_PROG_TYPE_SOCK_OPS:
8450 		/* map_update allowed only via dedicated helpers with event type checks */
8451 		if (func_id == BPF_FUNC_map_delete_elem)
8452 			return true;
8453 		break;
8454 	case BPF_PROG_TYPE_SOCKET_FILTER:
8455 	case BPF_PROG_TYPE_SCHED_CLS:
8456 	case BPF_PROG_TYPE_SCHED_ACT:
8457 	case BPF_PROG_TYPE_XDP:
8458 	case BPF_PROG_TYPE_SK_REUSEPORT:
8459 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
8460 	case BPF_PROG_TYPE_SK_LOOKUP:
8461 		return true;
8462 	default:
8463 		break;
8464 	}
8465 
8466 	verbose(env, "cannot update sockmap in this context\n");
8467 	return false;
8468 }
8469 
8470 bool bpf_allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
8471 {
8472 	return env->prog->jit_requested &&
8473 	       bpf_jit_supports_subprog_tailcalls();
8474 }
8475 
8476 static int check_map_func_compatibility(struct bpf_verifier_env *env,
8477 					struct bpf_map *map, int func_id)
8478 {
8479 	if (!map)
8480 		return 0;
8481 
8482 	/* We need a two way check, first is from map perspective ... */
8483 	switch (map->map_type) {
8484 	case BPF_MAP_TYPE_PROG_ARRAY:
8485 		if (func_id != BPF_FUNC_tail_call)
8486 			goto error;
8487 		break;
8488 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
8489 		if (func_id != BPF_FUNC_perf_event_read &&
8490 		    func_id != BPF_FUNC_perf_event_output &&
8491 		    func_id != BPF_FUNC_skb_output &&
8492 		    func_id != BPF_FUNC_perf_event_read_value &&
8493 		    func_id != BPF_FUNC_xdp_output)
8494 			goto error;
8495 		break;
8496 	case BPF_MAP_TYPE_RINGBUF:
8497 		if (func_id != BPF_FUNC_ringbuf_output &&
8498 		    func_id != BPF_FUNC_ringbuf_reserve &&
8499 		    func_id != BPF_FUNC_ringbuf_query &&
8500 		    func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
8501 		    func_id != BPF_FUNC_ringbuf_submit_dynptr &&
8502 		    func_id != BPF_FUNC_ringbuf_discard_dynptr)
8503 			goto error;
8504 		break;
8505 	case BPF_MAP_TYPE_USER_RINGBUF:
8506 		if (func_id != BPF_FUNC_user_ringbuf_drain)
8507 			goto error;
8508 		break;
8509 	case BPF_MAP_TYPE_STACK_TRACE:
8510 		if (func_id != BPF_FUNC_get_stackid)
8511 			goto error;
8512 		break;
8513 	case BPF_MAP_TYPE_CGROUP_ARRAY:
8514 		if (func_id != BPF_FUNC_skb_under_cgroup &&
8515 		    func_id != BPF_FUNC_current_task_under_cgroup)
8516 			goto error;
8517 		break;
8518 	case BPF_MAP_TYPE_CGROUP_STORAGE:
8519 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
8520 		if (func_id != BPF_FUNC_get_local_storage)
8521 			goto error;
8522 		break;
8523 	case BPF_MAP_TYPE_DEVMAP:
8524 	case BPF_MAP_TYPE_DEVMAP_HASH:
8525 		if (func_id != BPF_FUNC_redirect_map &&
8526 		    func_id != BPF_FUNC_map_lookup_elem)
8527 			goto error;
8528 		break;
8529 	/* Restrict bpf side of cpumap and xskmap, open when use-cases
8530 	 * appear.
8531 	 */
8532 	case BPF_MAP_TYPE_CPUMAP:
8533 		if (func_id != BPF_FUNC_redirect_map)
8534 			goto error;
8535 		break;
8536 	case BPF_MAP_TYPE_XSKMAP:
8537 		if (func_id != BPF_FUNC_redirect_map &&
8538 		    func_id != BPF_FUNC_map_lookup_elem)
8539 			goto error;
8540 		break;
8541 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
8542 	case BPF_MAP_TYPE_HASH_OF_MAPS:
8543 		if (func_id != BPF_FUNC_map_lookup_elem)
8544 			goto error;
8545 		break;
8546 	case BPF_MAP_TYPE_SOCKMAP:
8547 		if (func_id != BPF_FUNC_sk_redirect_map &&
8548 		    func_id != BPF_FUNC_sock_map_update &&
8549 		    func_id != BPF_FUNC_msg_redirect_map &&
8550 		    func_id != BPF_FUNC_sk_select_reuseport &&
8551 		    func_id != BPF_FUNC_map_lookup_elem &&
8552 		    !may_update_sockmap(env, func_id))
8553 			goto error;
8554 		break;
8555 	case BPF_MAP_TYPE_SOCKHASH:
8556 		if (func_id != BPF_FUNC_sk_redirect_hash &&
8557 		    func_id != BPF_FUNC_sock_hash_update &&
8558 		    func_id != BPF_FUNC_msg_redirect_hash &&
8559 		    func_id != BPF_FUNC_sk_select_reuseport &&
8560 		    func_id != BPF_FUNC_map_lookup_elem &&
8561 		    !may_update_sockmap(env, func_id))
8562 			goto error;
8563 		break;
8564 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
8565 		if (func_id != BPF_FUNC_sk_select_reuseport)
8566 			goto error;
8567 		break;
8568 	case BPF_MAP_TYPE_QUEUE:
8569 	case BPF_MAP_TYPE_STACK:
8570 		if (func_id != BPF_FUNC_map_peek_elem &&
8571 		    func_id != BPF_FUNC_map_pop_elem &&
8572 		    func_id != BPF_FUNC_map_push_elem)
8573 			goto error;
8574 		break;
8575 	case BPF_MAP_TYPE_SK_STORAGE:
8576 		if (func_id != BPF_FUNC_sk_storage_get &&
8577 		    func_id != BPF_FUNC_sk_storage_delete &&
8578 		    func_id != BPF_FUNC_kptr_xchg)
8579 			goto error;
8580 		break;
8581 	case BPF_MAP_TYPE_INODE_STORAGE:
8582 		if (func_id != BPF_FUNC_inode_storage_get &&
8583 		    func_id != BPF_FUNC_inode_storage_delete &&
8584 		    func_id != BPF_FUNC_kptr_xchg)
8585 			goto error;
8586 		break;
8587 	case BPF_MAP_TYPE_TASK_STORAGE:
8588 		if (func_id != BPF_FUNC_task_storage_get &&
8589 		    func_id != BPF_FUNC_task_storage_delete &&
8590 		    func_id != BPF_FUNC_kptr_xchg)
8591 			goto error;
8592 		break;
8593 	case BPF_MAP_TYPE_CGRP_STORAGE:
8594 		if (func_id != BPF_FUNC_cgrp_storage_get &&
8595 		    func_id != BPF_FUNC_cgrp_storage_delete &&
8596 		    func_id != BPF_FUNC_kptr_xchg)
8597 			goto error;
8598 		break;
8599 	case BPF_MAP_TYPE_BLOOM_FILTER:
8600 		if (func_id != BPF_FUNC_map_peek_elem &&
8601 		    func_id != BPF_FUNC_map_push_elem)
8602 			goto error;
8603 		break;
8604 	case BPF_MAP_TYPE_INSN_ARRAY:
8605 		goto error;
8606 	default:
8607 		break;
8608 	}
8609 
8610 	/* ... and second from the function itself. */
8611 	switch (func_id) {
8612 	case BPF_FUNC_tail_call:
8613 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
8614 			goto error;
8615 		if (env->subprog_cnt > 1 && !bpf_allow_tail_call_in_subprogs(env)) {
8616 			verbose(env, "mixing of tail_calls and bpf-to-bpf calls is not supported\n");
8617 			return -EINVAL;
8618 		}
8619 		break;
8620 	case BPF_FUNC_perf_event_read:
8621 	case BPF_FUNC_perf_event_output:
8622 	case BPF_FUNC_perf_event_read_value:
8623 	case BPF_FUNC_skb_output:
8624 	case BPF_FUNC_xdp_output:
8625 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
8626 			goto error;
8627 		break;
8628 	case BPF_FUNC_ringbuf_output:
8629 	case BPF_FUNC_ringbuf_reserve:
8630 	case BPF_FUNC_ringbuf_query:
8631 	case BPF_FUNC_ringbuf_reserve_dynptr:
8632 	case BPF_FUNC_ringbuf_submit_dynptr:
8633 	case BPF_FUNC_ringbuf_discard_dynptr:
8634 		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
8635 			goto error;
8636 		break;
8637 	case BPF_FUNC_user_ringbuf_drain:
8638 		if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
8639 			goto error;
8640 		break;
8641 	case BPF_FUNC_get_stackid:
8642 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
8643 			goto error;
8644 		break;
8645 	case BPF_FUNC_current_task_under_cgroup:
8646 	case BPF_FUNC_skb_under_cgroup:
8647 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
8648 			goto error;
8649 		break;
8650 	case BPF_FUNC_redirect_map:
8651 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
8652 		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
8653 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
8654 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
8655 			goto error;
8656 		break;
8657 	case BPF_FUNC_sk_redirect_map:
8658 	case BPF_FUNC_msg_redirect_map:
8659 	case BPF_FUNC_sock_map_update:
8660 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
8661 			goto error;
8662 		break;
8663 	case BPF_FUNC_sk_redirect_hash:
8664 	case BPF_FUNC_msg_redirect_hash:
8665 	case BPF_FUNC_sock_hash_update:
8666 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
8667 			goto error;
8668 		break;
8669 	case BPF_FUNC_get_local_storage:
8670 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
8671 		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
8672 			goto error;
8673 		break;
8674 	case BPF_FUNC_sk_select_reuseport:
8675 		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
8676 		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
8677 		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
8678 			goto error;
8679 		break;
8680 	case BPF_FUNC_map_pop_elem:
8681 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8682 		    map->map_type != BPF_MAP_TYPE_STACK)
8683 			goto error;
8684 		break;
8685 	case BPF_FUNC_map_peek_elem:
8686 	case BPF_FUNC_map_push_elem:
8687 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8688 		    map->map_type != BPF_MAP_TYPE_STACK &&
8689 		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
8690 			goto error;
8691 		break;
8692 	case BPF_FUNC_map_lookup_percpu_elem:
8693 		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
8694 		    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
8695 		    map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
8696 			goto error;
8697 		break;
8698 	case BPF_FUNC_sk_storage_get:
8699 	case BPF_FUNC_sk_storage_delete:
8700 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
8701 			goto error;
8702 		break;
8703 	case BPF_FUNC_inode_storage_get:
8704 	case BPF_FUNC_inode_storage_delete:
8705 		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
8706 			goto error;
8707 		break;
8708 	case BPF_FUNC_task_storage_get:
8709 	case BPF_FUNC_task_storage_delete:
8710 		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
8711 			goto error;
8712 		break;
8713 	case BPF_FUNC_cgrp_storage_get:
8714 	case BPF_FUNC_cgrp_storage_delete:
8715 		if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
8716 			goto error;
8717 		break;
8718 	default:
8719 		break;
8720 	}
8721 
8722 	return 0;
8723 error:
8724 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
8725 		map->map_type, func_id_name(func_id), func_id);
8726 	return -EINVAL;
8727 }
8728 
8729 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
8730 {
8731 	int count = 0;
8732 
8733 	if (arg_type_is_raw_mem(fn->arg1_type))
8734 		count++;
8735 	if (arg_type_is_raw_mem(fn->arg2_type))
8736 		count++;
8737 	if (arg_type_is_raw_mem(fn->arg3_type))
8738 		count++;
8739 	if (arg_type_is_raw_mem(fn->arg4_type))
8740 		count++;
8741 	if (arg_type_is_raw_mem(fn->arg5_type))
8742 		count++;
8743 
8744 	/* We only support one arg being in raw mode at the moment,
8745 	 * which is sufficient for the helper functions we have
8746 	 * right now.
8747 	 */
8748 	return count <= 1;
8749 }
8750 
8751 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
8752 {
8753 	bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
8754 	bool has_size = fn->arg_size[arg] != 0;
8755 	bool is_next_size = false;
8756 
8757 	if (arg + 1 < ARRAY_SIZE(fn->arg_type))
8758 		is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
8759 
8760 	if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
8761 		return is_next_size;
8762 
8763 	return has_size == is_next_size || is_next_size == is_fixed;
8764 }
8765 
8766 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
8767 {
8768 	/* bpf_xxx(..., buf, len) call will access 'len'
8769 	 * bytes from memory 'buf'. Both arg types need
8770 	 * to be paired, so make sure there's no buggy
8771 	 * helper function specification.
8772 	 */
8773 	if (arg_type_is_mem_size(fn->arg1_type) ||
8774 	    check_args_pair_invalid(fn, 0) ||
8775 	    check_args_pair_invalid(fn, 1) ||
8776 	    check_args_pair_invalid(fn, 2) ||
8777 	    check_args_pair_invalid(fn, 3) ||
8778 	    check_args_pair_invalid(fn, 4))
8779 		return false;
8780 
8781 	return true;
8782 }
8783 
8784 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
8785 {
8786 	int i;
8787 
8788 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
8789 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
8790 			return !!fn->arg_btf_id[i];
8791 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
8792 			return fn->arg_btf_id[i] == BPF_PTR_POISON;
8793 		if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
8794 		    /* arg_btf_id and arg_size are in a union. */
8795 		    (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
8796 		     !(fn->arg_type[i] & MEM_FIXED_SIZE)))
8797 			return false;
8798 	}
8799 
8800 	return true;
8801 }
8802 
8803 static bool check_mem_arg_rw_flag_ok(const struct bpf_func_proto *fn)
8804 {
8805 	int i;
8806 
8807 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
8808 		enum bpf_arg_type arg_type = fn->arg_type[i];
8809 
8810 		if (base_type(arg_type) != ARG_PTR_TO_MEM)
8811 			continue;
8812 		if (!(arg_type & (MEM_WRITE | MEM_RDONLY)))
8813 			return false;
8814 	}
8815 
8816 	return true;
8817 }
8818 
8819 static bool check_proto_release_reg(const struct bpf_func_proto *fn, struct bpf_call_arg_meta *meta)
8820 {
8821 	int i;
8822 
8823 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
8824 		enum bpf_arg_type arg_type = fn->arg_type[i];
8825 
8826 		if (arg_type_is_release(arg_type)) {
8827 			if (meta->release_regno)
8828 				return false;
8829 			meta->release_regno = i + 1;
8830 		}
8831 	}
8832 
8833 	return true;
8834 }
8835 
8836 static int check_func_proto(const struct bpf_func_proto *fn, struct bpf_call_arg_meta *meta)
8837 {
8838 	return check_raw_mode_ok(fn) &&
8839 	       check_arg_pair_ok(fn) &&
8840 	       check_mem_arg_rw_flag_ok(fn) &&
8841 	       check_proto_release_reg(fn, meta) &&
8842 	       check_btf_id_ok(fn) ? 0 : -EINVAL;
8843 }
8844 
8845 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
8846  * are now invalid, so turn them into unknown SCALAR_VALUE.
8847  *
8848  * This also applies to dynptr slices belonging to skb and xdp dynptrs,
8849  * since these slices point to packet data.
8850  */
8851 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
8852 {
8853 	struct bpf_func_state *state;
8854 	struct bpf_reg_state *reg;
8855 
8856 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
8857 		if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
8858 			mark_reg_invalid(env, reg);
8859 	}));
8860 }
8861 
8862 enum {
8863 	AT_PKT_END = -1,
8864 	BEYOND_PKT_END = -2,
8865 };
8866 
8867 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
8868 {
8869 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
8870 	struct bpf_reg_state *reg = &state->regs[regn];
8871 
8872 	if (reg->type != PTR_TO_PACKET)
8873 		/* PTR_TO_PACKET_META is not supported yet */
8874 		return;
8875 
8876 	/* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
8877 	 * How far beyond pkt_end it goes is unknown.
8878 	 * if (!range_open) it's the case of pkt >= pkt_end
8879 	 * if (range_open) it's the case of pkt > pkt_end
8880 	 * hence this pointer is at least 1 byte bigger than pkt_end
8881 	 */
8882 	if (range_open)
8883 		reg->range = BEYOND_PKT_END;
8884 	else
8885 		reg->range = AT_PKT_END;
8886 }
8887 
8888 static int release_reference_nomark(struct bpf_verifier_state *state, int id)
8889 {
8890 	int i;
8891 
8892 	for (i = 0; i < state->acquired_refs; i++) {
8893 		if (state->refs[i].type != REF_TYPE_PTR)
8894 			continue;
8895 		if (state->refs[i].id == id) {
8896 			release_reference_state(state, i);
8897 			return 0;
8898 		}
8899 	}
8900 	return -EINVAL;
8901 }
8902 
8903 static int idstack_push(struct bpf_idmap *idmap, u32 id)
8904 {
8905 	int i;
8906 
8907 	if (!id)
8908 		return 0;
8909 
8910 	for (i = 0; i < idmap->cnt; i++)
8911 		if (idmap->map[i].old == id)
8912 			return 0;
8913 
8914 	if (WARN_ON_ONCE(idmap->cnt >= BPF_ID_MAP_SIZE))
8915 		return -EFAULT;
8916 
8917 	idmap->map[idmap->cnt++].old = id;
8918 	return 0;
8919 }
8920 
8921 static int idstack_pop(struct bpf_idmap *idmap)
8922 {
8923 	if (!idmap->cnt)
8924 		return 0;
8925 
8926 	return idmap->map[--idmap->cnt].old;
8927 }
8928 
8929 /* Release id and objects derived from it iteratively in a DFS manner */
8930 static int release_reference(struct bpf_verifier_env *env, int id)
8931 {
8932 	u32 mask = (1 << STACK_SPILL) | (1 << STACK_DYNPTR);
8933 	struct bpf_verifier_state *vstate = env->cur_state;
8934 	struct bpf_idmap *idstack = &env->idmap_scratch;
8935 	struct bpf_stack_state *stack;
8936 	struct bpf_func_state *state;
8937 	struct bpf_reg_state *reg;
8938 	int i, err;
8939 
8940 	idstack->cnt = 0;
8941 	err = idstack_push(idstack, id);
8942 	if (err)
8943 		return err;
8944 
8945 	if (find_reference_state(vstate, id))
8946 		WARN_ON_ONCE(release_reference_nomark(vstate, id));
8947 
8948 	while ((id = idstack_pop(idstack))) {
8949 		/*
8950 		 * Child references are inaccessible after parent is released,
8951 		 * any child references that exist at this point are a leak.
8952 		 */
8953 		for (i = 0; i < vstate->acquired_refs; i++) {
8954 			if (vstate->refs[i].type != REF_TYPE_PTR)
8955 				continue;
8956 			if (vstate->refs[i].parent_id != id)
8957 				continue;
8958 			verbose(env, "Leaking reference id=%d alloc_insn=%d. Release it first.\n",
8959 				vstate->refs[i].id, vstate->refs[i].insn_idx);
8960 			return -EINVAL;
8961 		}
8962 
8963 		bpf_for_each_reg_in_vstate_mask(vstate, state, reg, stack, mask, ({
8964 			if (reg->id != id && reg->parent_id != id)
8965 				continue;
8966 
8967 			/* Free objects derived from the current object */
8968 			if (reg->parent_id == id) {
8969 				err = idstack_push(idstack, reg->id);
8970 				if (err)
8971 					return err;
8972 			}
8973 
8974 			if (!stack || stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL)
8975 				mark_reg_invalid(env, reg);
8976 			else if (stack->slot_type[BPF_REG_SIZE - 1] == STACK_DYNPTR)
8977 				invalidate_dynptr(env, stack);
8978 		}));
8979 	}
8980 
8981 	return 0;
8982 }
8983 
8984 static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
8985 {
8986 	struct bpf_func_state *unused;
8987 	struct bpf_reg_state *reg;
8988 
8989 	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
8990 		if (type_is_non_owning_ref(reg->type))
8991 			mark_reg_invalid(env, reg);
8992 	}));
8993 }
8994 
8995 static void invalidate_rcu_protected_refs(struct bpf_verifier_env *env)
8996 {
8997 	struct bpf_stack_state *stack;
8998 	struct bpf_func_state *state;
8999 	struct bpf_reg_state *reg;
9000 	u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
9001 
9002 	bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, stack, clear_mask, ({
9003 		if (reg->type & MEM_RCU) {
9004 			reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
9005 			reg->type |= PTR_UNTRUSTED;
9006 		}
9007 	}));
9008 }
9009 
9010 static int ref_convert_alloc_rcu_protected(struct bpf_verifier_env *env, u32 id)
9011 {
9012 	struct bpf_func_state *state;
9013 	struct bpf_reg_state *reg;
9014 	int err;
9015 
9016 	err = release_reference_nomark(env->cur_state, id);
9017 
9018 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9019 		if (reg->id != id)
9020 			continue;
9021 		if ((reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
9022 			reg->id = 0;
9023 			reg->type &= ~MEM_ALLOC;
9024 			reg->type |= MEM_RCU;
9025 		}
9026 	}));
9027 
9028 	return err;
9029 }
9030 
9031 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
9032 				    struct bpf_reg_state *regs)
9033 {
9034 	int i;
9035 
9036 	/* after the call registers r0 - r5 were scratched */
9037 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
9038 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
9039 		__check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
9040 	}
9041 }
9042 
9043 static void invalidate_outgoing_stack_args(const struct bpf_verifier_env *env,
9044 					   struct bpf_func_state *state)
9045 {
9046 	int i, nslots = state->out_stack_arg_cnt;
9047 
9048 	for (i = 0; i < nslots; i++)
9049 		bpf_mark_reg_not_init(env, &state->stack_arg_regs[i]);
9050 }
9051 
9052 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
9053 				   struct bpf_func_state *caller,
9054 				   struct bpf_func_state *callee,
9055 				   int insn_idx);
9056 
9057 static int set_callee_state(struct bpf_verifier_env *env,
9058 			    struct bpf_func_state *caller,
9059 			    struct bpf_func_state *callee, int insn_idx);
9060 
9061 static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
9062 			    set_callee_state_fn set_callee_state_cb,
9063 			    struct bpf_verifier_state *state)
9064 {
9065 	struct bpf_func_state *caller, *callee;
9066 	int err;
9067 
9068 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
9069 		verbose(env, "the call stack of %d frames is too deep\n",
9070 			state->curframe + 2);
9071 		return -E2BIG;
9072 	}
9073 
9074 	if (state->frame[state->curframe + 1]) {
9075 		verifier_bug(env, "Frame %d already allocated", state->curframe + 1);
9076 		return -EFAULT;
9077 	}
9078 
9079 	caller = state->frame[state->curframe];
9080 	callee = kzalloc_obj(*callee, GFP_KERNEL_ACCOUNT);
9081 	if (!callee)
9082 		return -ENOMEM;
9083 	state->frame[state->curframe + 1] = callee;
9084 
9085 	/* callee cannot access r0, r6 - r9 for reading and has to write
9086 	 * into its own stack before reading from it.
9087 	 * callee can read/write into caller's stack
9088 	 */
9089 	init_func_state(env, callee,
9090 			/* remember the callsite, it will be used by bpf_exit */
9091 			callsite,
9092 			state->curframe + 1 /* frameno within this callchain */,
9093 			subprog /* subprog number within this prog */);
9094 	err = set_callee_state_cb(env, caller, callee, callsite);
9095 	if (err)
9096 		goto err_out;
9097 
9098 	/* only increment it after check_reg_arg() finished */
9099 	state->curframe++;
9100 
9101 	return 0;
9102 
9103 err_out:
9104 	free_func_state(callee);
9105 	state->frame[state->curframe + 1] = NULL;
9106 	return err;
9107 }
9108 
9109 static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
9110 				    const struct btf *btf,
9111 				    struct bpf_reg_state *regs)
9112 {
9113 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
9114 	struct bpf_func_state *caller = cur_func(env);
9115 	struct bpf_verifier_log *log = &env->log;
9116 	struct ref_obj_desc ref_obj = {};
9117 	u32 i;
9118 	int ret, err;
9119 
9120 	ret = btf_prepare_func_args(env, subprog);
9121 	if (ret) {
9122 		if (bpf_in_stack_arg_cnt(sub) > 0) {
9123 			err = check_outgoing_stack_args(env, caller, sub->arg_cnt);
9124 			if (err)
9125 				return err;
9126 		}
9127 		return ret;
9128 	}
9129 
9130 	ret = check_outgoing_stack_args(env, caller, sub->arg_cnt);
9131 	if (ret)
9132 		return ret;
9133 
9134 	/* check that BTF function arguments match actual types that the
9135 	 * verifier sees.
9136 	 */
9137 	for (i = 0; i < sub->arg_cnt; i++) {
9138 		argno_t argno = argno_from_arg(i + 1);
9139 		struct bpf_reg_state *reg = get_func_arg_reg(caller, regs, i);
9140 		struct bpf_subprog_arg_info *arg = &sub->args[i];
9141 
9142 		if (arg->arg_type == ARG_ANYTHING) {
9143 			if (reg->type != SCALAR_VALUE) {
9144 				bpf_log(log, "%s is not a scalar\n", reg_arg_name(env, argno));
9145 				return -EINVAL;
9146 			}
9147 		} else if (arg->arg_type & PTR_UNTRUSTED) {
9148 			/*
9149 			 * Anything is allowed for untrusted arguments, as these are
9150 			 * read-only and probe read instructions would protect against
9151 			 * invalid memory access.
9152 			 */
9153 		} else if (arg->arg_type == ARG_PTR_TO_CTX) {
9154 			ret = check_func_arg_reg_off(env, reg, argno, ARG_PTR_TO_CTX);
9155 			if (ret < 0)
9156 				return ret;
9157 			/* If function expects ctx type in BTF check that caller
9158 			 * is passing PTR_TO_CTX.
9159 			 */
9160 			if (reg->type != PTR_TO_CTX) {
9161 				bpf_log(log, "%s expects pointer to ctx\n",
9162 					reg_arg_name(env, argno));
9163 				return -EINVAL;
9164 			}
9165 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
9166 			ret = check_func_arg_reg_off(env, reg, argno, ARG_DONTCARE);
9167 			if (ret < 0)
9168 				return ret;
9169 			if (check_mem_reg(env, reg, argno, arg->mem_size))
9170 				return -EINVAL;
9171 			if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) {
9172 				bpf_log(log, "%s is expected to be non-NULL\n",
9173 					reg_arg_name(env, argno));
9174 				return -EINVAL;
9175 			}
9176 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
9177 			/*
9178 			 * Can pass any value and the kernel won't crash, but
9179 			 * only PTR_TO_ARENA or SCALAR make sense. Everything
9180 			 * else is a bug in the bpf program. Point it out to
9181 			 * the user at the verification time instead of
9182 			 * run-time debug nightmare.
9183 			 */
9184 			if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) {
9185 				bpf_log(log, "%s is not a pointer to arena or scalar.\n",
9186 					reg_arg_name(env, argno));
9187 				return -EINVAL;
9188 			}
9189 		} else if (arg->arg_type == ARG_PTR_TO_DYNPTR) {
9190 			ret = check_func_arg_reg_off(env, reg, argno, ARG_PTR_TO_DYNPTR);
9191 			if (ret)
9192 				return ret;
9193 
9194 			ret = process_dynptr_func(env, reg, argno, -1, arg->arg_type, &ref_obj, NULL);
9195 			if (ret)
9196 				return ret;
9197 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
9198 			struct bpf_call_arg_meta meta;
9199 			int err;
9200 
9201 			if (bpf_register_is_null(reg) && type_may_be_null(arg->arg_type))
9202 				continue;
9203 
9204 			memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */
9205 			err = check_reg_type(env, reg, argno, arg->arg_type, &arg->btf_id, &meta);
9206 			err = err ?: check_func_arg_reg_off(env, reg, argno, arg->arg_type);
9207 			if (err)
9208 				return err;
9209 		} else {
9210 			verifier_bug(env, "unrecognized %s type %d",
9211 				     reg_arg_name(env, argno), arg->arg_type);
9212 			return -EFAULT;
9213 		}
9214 	}
9215 
9216 	return 0;
9217 }
9218 
9219 /* Compare BTF of a function call with given bpf_reg_state.
9220  * Returns:
9221  * EFAULT - there is a verifier bug. Abort verification.
9222  * EINVAL - there is a type mismatch or BTF is not available.
9223  * 0 - BTF matches with what bpf_reg_state expects.
9224  * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
9225  */
9226 static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
9227 				  struct bpf_reg_state *regs)
9228 {
9229 	struct bpf_prog *prog = env->prog;
9230 	struct btf *btf = prog->aux->btf;
9231 	u32 btf_id;
9232 	int err;
9233 
9234 	if (!prog->aux->func_info)
9235 		return -EINVAL;
9236 
9237 	btf_id = prog->aux->func_info[subprog].type_id;
9238 	if (!btf_id)
9239 		return -EFAULT;
9240 
9241 	if (prog->aux->func_info_aux[subprog].unreliable)
9242 		return -EINVAL;
9243 
9244 	err = btf_check_func_arg_match(env, subprog, btf, regs);
9245 	/* Compiler optimizations can remove arguments from static functions
9246 	 * or mismatched type can be passed into a global function.
9247 	 * In such cases mark the function as unreliable from BTF point of view.
9248 	 */
9249 	if (err)
9250 		prog->aux->func_info_aux[subprog].unreliable = true;
9251 	return err;
9252 }
9253 
9254 static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9255 			      int insn_idx, int subprog,
9256 			      set_callee_state_fn set_callee_state_cb)
9257 {
9258 	struct bpf_verifier_state *state = env->cur_state, *callback_state;
9259 	struct bpf_func_state *caller, *callee;
9260 	int err;
9261 
9262 	caller = state->frame[state->curframe];
9263 	err = btf_check_subprog_call(env, subprog, caller->regs);
9264 	if (err == -EFAULT)
9265 		return err;
9266 
9267 	/* set_callee_state is used for direct subprog calls, but we are
9268 	 * interested in validating only BPF helpers that can call subprogs as
9269 	 * callbacks
9270 	 */
9271 	env->subprog_info[subprog].is_cb = true;
9272 	if (bpf_pseudo_kfunc_call(insn) &&
9273 	    !is_callback_calling_kfunc(insn->imm)) {
9274 		verifier_bug(env, "kfunc %s#%d not marked as callback-calling",
9275 			     func_id_name(insn->imm), insn->imm);
9276 		return -EFAULT;
9277 	} else if (!bpf_pseudo_kfunc_call(insn) &&
9278 		   !is_callback_calling_function(insn->imm)) { /* helper */
9279 		verifier_bug(env, "helper %s#%d not marked as callback-calling",
9280 			     func_id_name(insn->imm), insn->imm);
9281 		return -EFAULT;
9282 	}
9283 
9284 	if (bpf_is_async_callback_calling_insn(insn)) {
9285 		struct bpf_verifier_state *async_cb;
9286 
9287 		/* there is no real recursion here. timer and workqueue callbacks are async */
9288 		env->subprog_info[subprog].is_async_cb = true;
9289 		async_cb = push_async_cb(env, env->subprog_info[subprog].start,
9290 					 insn_idx, subprog,
9291 					 is_async_cb_sleepable(env, insn));
9292 		if (IS_ERR(async_cb))
9293 			return PTR_ERR(async_cb);
9294 		callee = async_cb->frame[0];
9295 		callee->async_entry_cnt = caller->async_entry_cnt + 1;
9296 
9297 		/* Convert bpf_timer_set_callback() args into timer callback args */
9298 		err = set_callee_state_cb(env, caller, callee, insn_idx);
9299 		if (err)
9300 			return err;
9301 
9302 		return 0;
9303 	}
9304 
9305 	/* for callback functions enqueue entry to callback and
9306 	 * proceed with next instruction within current frame.
9307 	 */
9308 	callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false);
9309 	if (IS_ERR(callback_state))
9310 		return PTR_ERR(callback_state);
9311 
9312 	err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb,
9313 			       callback_state);
9314 	if (err)
9315 		return err;
9316 
9317 	callback_state->callback_unroll_depth++;
9318 	callback_state->frame[callback_state->curframe - 1]->callback_depth++;
9319 	caller->callback_depth = 0;
9320 	return 0;
9321 }
9322 
9323 static int process_bpf_exit_full(struct bpf_verifier_env *env,
9324 				 bool *do_print_state, bool exception_exit);
9325 
9326 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9327 			   int *insn_idx)
9328 {
9329 	struct bpf_verifier_state *state = env->cur_state;
9330 	struct bpf_subprog_info *caller_info;
9331 	u16 callee_incoming, stack_arg_cnt;
9332 	struct bpf_func_state *caller;
9333 	int err, subprog, target_insn;
9334 
9335 	target_insn = *insn_idx + insn->imm + 1;
9336 	subprog = bpf_find_subprog(env, target_insn);
9337 	if (verifier_bug_if(subprog < 0, env, "target of func call at insn %d is not a program",
9338 			    target_insn))
9339 		return -EFAULT;
9340 
9341 	caller = state->frame[state->curframe];
9342 	err = btf_check_subprog_call(env, subprog, caller->regs);
9343 	if (err == -EFAULT)
9344 		return err;
9345 	if (bpf_subprog_is_global(env, subprog)) {
9346 		const char *sub_name = subprog_name(env, subprog);
9347 
9348 		if (env->cur_state->active_locks) {
9349 			verbose(env, "global function calls are not allowed while holding a lock,\n"
9350 				     "use static function instead\n");
9351 			return -EINVAL;
9352 		}
9353 
9354 		if (env->subprog_info[subprog].might_sleep && !in_sleepable_context(env)) {
9355 			verbose(env, "sleepable global function %s() called in %s\n",
9356 				sub_name, non_sleepable_context_description(env));
9357 			return -EINVAL;
9358 		}
9359 
9360 		if (err) {
9361 			verbose(env, "Caller passes invalid args into func#%d ('%s')\n",
9362 				subprog, sub_name);
9363 			return err;
9364 		}
9365 
9366 		if (env->log.level & BPF_LOG_LEVEL)
9367 			verbose(env, "Func#%d ('%s') is global and assumed valid.\n",
9368 				subprog, sub_name);
9369 		if (env->subprog_info[subprog].changes_pkt_data)
9370 			clear_all_pkt_pointers(env);
9371 		/* mark global subprog for verifying after main prog */
9372 		subprog_aux(env, subprog)->called = true;
9373 		clear_caller_saved_regs(env, caller->regs);
9374 		invalidate_outgoing_stack_args(env, cur_func(env));
9375 
9376 		/* All non-void global functions return a 64-bit SCALAR_VALUE. */
9377 		if (!subprog_returns_void(env, subprog)) {
9378 			mark_reg_unknown(env, caller->regs, BPF_REG_0);
9379 			caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
9380 		}
9381 
9382 		if (env->subprog_info[subprog].might_throw) {
9383 			struct bpf_verifier_state *branch;
9384 
9385 			branch = push_stack(env, *insn_idx + 1, *insn_idx, false);
9386 			if (IS_ERR(branch)) {
9387 				verbose(env, "failed to push state for global subprog exception path\n");
9388 				return PTR_ERR(branch);
9389 			}
9390 			return process_bpf_exit_full(env, NULL, true);
9391 		}
9392 
9393 		/* continue with next insn after call */
9394 		return 0;
9395 	}
9396 
9397 	/*
9398 	 * Track caller's total stack arg count (incoming + max outgoing).
9399 	 * This is needed so the JIT knows how much stack arg space to allocate.
9400 	 */
9401 	caller_info = &env->subprog_info[caller->subprogno];
9402 	callee_incoming = bpf_in_stack_arg_cnt(&env->subprog_info[subprog]);
9403 	stack_arg_cnt = bpf_in_stack_arg_cnt(caller_info) + callee_incoming;
9404 	if (stack_arg_cnt > caller_info->stack_arg_cnt)
9405 		caller_info->stack_arg_cnt = stack_arg_cnt;
9406 
9407 	/* for regular function entry setup new frame and continue
9408 	 * from that frame.
9409 	 */
9410 	err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state);
9411 	if (err)
9412 		return err;
9413 
9414 	clear_caller_saved_regs(env, caller->regs);
9415 
9416 	/* and go analyze first insn of the callee */
9417 	*insn_idx = env->subprog_info[subprog].start - 1;
9418 
9419 	if (env->log.level & BPF_LOG_LEVEL) {
9420 		verbose(env, "caller:\n");
9421 		print_verifier_state(env, state, caller->frameno, true);
9422 		verbose(env, "callee:\n");
9423 		print_verifier_state(env, state, state->curframe, true);
9424 	}
9425 
9426 	return 0;
9427 }
9428 
9429 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
9430 				   struct bpf_func_state *caller,
9431 				   struct bpf_func_state *callee)
9432 {
9433 	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
9434 	 *      void *callback_ctx, u64 flags);
9435 	 * callback_fn(struct bpf_map *map, void *key, void *value,
9436 	 *      void *callback_ctx);
9437 	 */
9438 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9439 
9440 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9441 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9442 	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9443 
9444 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9445 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9446 	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9447 
9448 	/* pointer to stack or null */
9449 	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
9450 
9451 	/* unused */
9452 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9453 	return 0;
9454 }
9455 
9456 static int set_callee_state(struct bpf_verifier_env *env,
9457 			    struct bpf_func_state *caller,
9458 			    struct bpf_func_state *callee, int insn_idx)
9459 {
9460 	int i;
9461 
9462 	/* copy r1 - r5 args that callee can access.  The copy includes parent
9463 	 * pointers, which connects us up to the liveness chain
9464 	 */
9465 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
9466 		callee->regs[i] = caller->regs[i];
9467 	return 0;
9468 }
9469 
9470 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
9471 				       struct bpf_func_state *caller,
9472 				       struct bpf_func_state *callee,
9473 				       int insn_idx)
9474 {
9475 	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
9476 	struct bpf_map *map;
9477 	int err;
9478 
9479 	/* valid map_ptr and poison value does not matter */
9480 	map = insn_aux->map_ptr_state.map_ptr;
9481 	if (!map->ops->map_set_for_each_callback_args ||
9482 	    !map->ops->map_for_each_callback) {
9483 		verbose(env, "callback function not allowed for map\n");
9484 		return -ENOTSUPP;
9485 	}
9486 
9487 	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
9488 	if (err)
9489 		return err;
9490 
9491 	callee->in_callback_fn = true;
9492 	callee->callback_ret_range = retval_range(0, 1);
9493 	return 0;
9494 }
9495 
9496 static int set_loop_callback_state(struct bpf_verifier_env *env,
9497 				   struct bpf_func_state *caller,
9498 				   struct bpf_func_state *callee,
9499 				   int insn_idx)
9500 {
9501 	/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
9502 	 *	    u64 flags);
9503 	 * callback_fn(u64 index, void *callback_ctx);
9504 	 */
9505 	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
9506 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9507 
9508 	/* unused */
9509 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9510 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9511 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9512 
9513 	callee->in_callback_fn = true;
9514 	callee->callback_ret_range = retval_range(0, 1);
9515 	return 0;
9516 }
9517 
9518 static int set_timer_callback_state(struct bpf_verifier_env *env,
9519 				    struct bpf_func_state *caller,
9520 				    struct bpf_func_state *callee,
9521 				    int insn_idx)
9522 {
9523 	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
9524 
9525 	/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
9526 	 * callback_fn(struct bpf_map *map, void *key, void *value);
9527 	 */
9528 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9529 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9530 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9531 
9532 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9533 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9534 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9535 
9536 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9537 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9538 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9539 
9540 	/* unused */
9541 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9542 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9543 	callee->in_async_callback_fn = true;
9544 	callee->callback_ret_range = retval_range(0, 0);
9545 	return 0;
9546 }
9547 
9548 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
9549 				       struct bpf_func_state *caller,
9550 				       struct bpf_func_state *callee,
9551 				       int insn_idx)
9552 {
9553 	/* bpf_find_vma(struct task_struct *task, u64 addr,
9554 	 *               void *callback_fn, void *callback_ctx, u64 flags)
9555 	 * (callback_fn)(struct task_struct *task,
9556 	 *               struct vm_area_struct *vma, void *callback_ctx);
9557 	 */
9558 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9559 
9560 	callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
9561 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9562 	callee->regs[BPF_REG_2].btf =  btf_vmlinux;
9563 	callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
9564 
9565 	/* pointer to stack or null */
9566 	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
9567 
9568 	/* unused */
9569 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9570 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9571 	callee->in_callback_fn = true;
9572 	callee->callback_ret_range = retval_range(0, 1);
9573 	return 0;
9574 }
9575 
9576 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
9577 					   struct bpf_func_state *caller,
9578 					   struct bpf_func_state *callee,
9579 					   int insn_idx)
9580 {
9581 	/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
9582 	 *			  callback_ctx, u64 flags);
9583 	 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
9584 	 */
9585 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
9586 	mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
9587 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9588 
9589 	/* unused */
9590 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9591 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9592 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9593 
9594 	callee->in_callback_fn = true;
9595 	callee->callback_ret_range = retval_range(0, 1);
9596 	return 0;
9597 }
9598 
9599 static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
9600 					 struct bpf_func_state *caller,
9601 					 struct bpf_func_state *callee,
9602 					 int insn_idx)
9603 {
9604 	/* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
9605 	 *                     bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
9606 	 *
9607 	 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
9608 	 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
9609 	 * by this point, so look at 'root'
9610 	 */
9611 	struct btf_field *field;
9612 
9613 	field = reg_find_field_offset(&caller->regs[BPF_REG_1],
9614 				      caller->regs[BPF_REG_1].var_off.value,
9615 				      BPF_RB_ROOT);
9616 	if (!field || !field->graph_root.value_btf_id)
9617 		return -EFAULT;
9618 
9619 	mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
9620 	ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
9621 	mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
9622 	ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
9623 
9624 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9625 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9626 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9627 	callee->in_callback_fn = true;
9628 	callee->callback_ret_range = retval_range(0, 1);
9629 	return 0;
9630 }
9631 
9632 static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env,
9633 						 struct bpf_func_state *caller,
9634 						 struct bpf_func_state *callee,
9635 						 int insn_idx)
9636 {
9637 	struct bpf_map *map_ptr = caller->regs[BPF_REG_3].map_ptr;
9638 
9639 	/*
9640 	 * callback_fn(struct bpf_map *map, void *key, void *value);
9641 	 */
9642 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9643 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9644 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9645 
9646 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9647 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9648 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9649 
9650 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9651 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9652 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9653 
9654 	/* unused */
9655 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9656 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9657 	callee->in_async_callback_fn = true;
9658 	callee->callback_ret_range = retval_range(S32_MIN, S32_MAX);
9659 	return 0;
9660 }
9661 
9662 static bool is_rbtree_lock_required_kfunc(u32 btf_id);
9663 
9664 /* Are we currently verifying the callback for a rbtree helper that must
9665  * be called with lock held? If so, no need to complain about unreleased
9666  * lock
9667  */
9668 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
9669 {
9670 	struct bpf_verifier_state *state = env->cur_state;
9671 	struct bpf_insn *insn = env->prog->insnsi;
9672 	struct bpf_func_state *callee;
9673 	int kfunc_btf_id;
9674 
9675 	if (!state->curframe)
9676 		return false;
9677 
9678 	callee = state->frame[state->curframe];
9679 
9680 	if (!callee->in_callback_fn)
9681 		return false;
9682 
9683 	kfunc_btf_id = insn[callee->callsite].imm;
9684 	return is_rbtree_lock_required_kfunc(kfunc_btf_id);
9685 }
9686 
9687 static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg)
9688 {
9689 	if (range.return_32bit)
9690 		return range.minval <= reg_s32_min(reg) && reg_s32_max(reg) <= range.maxval;
9691 	else
9692 		return range.minval <= reg_smin(reg) && reg_smax(reg) <= range.maxval;
9693 }
9694 
9695 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
9696 {
9697 	struct bpf_verifier_state *state = env->cur_state, *prev_st;
9698 	struct bpf_func_state *caller, *callee;
9699 	struct bpf_reg_state *r0;
9700 	bool in_callback_fn;
9701 	int err;
9702 
9703 	callee = state->frame[state->curframe];
9704 	r0 = &callee->regs[BPF_REG_0];
9705 	if (r0->type == PTR_TO_STACK) {
9706 		/* technically it's ok to return caller's stack pointer
9707 		 * (or caller's caller's pointer) back to the caller,
9708 		 * since these pointers are valid. Only current stack
9709 		 * pointer will be invalid as soon as function exits,
9710 		 * but let's be conservative
9711 		 */
9712 		verbose(env, "cannot return stack pointer to the caller\n");
9713 		return -EINVAL;
9714 	}
9715 
9716 	caller = state->frame[state->curframe - 1];
9717 	if (callee->in_callback_fn) {
9718 		if (r0->type != SCALAR_VALUE) {
9719 			verbose(env, "R0 not a scalar value\n");
9720 			return -EACCES;
9721 		}
9722 
9723 		/* we are going to rely on register's precise value */
9724 		err = mark_chain_precision(env, BPF_REG_0);
9725 		if (err)
9726 			return err;
9727 
9728 		/* enforce R0 return value range, and bpf_callback_t returns 64bit */
9729 		if (!retval_range_within(callee->callback_ret_range, r0)) {
9730 			verbose_invalid_scalar(env, r0, callee->callback_ret_range,
9731 					       "At callback return", "R0");
9732 			return -EINVAL;
9733 		}
9734 		if (!bpf_calls_callback(env, callee->callsite)) {
9735 			verifier_bug(env, "in callback at %d, callsite %d !calls_callback",
9736 				     *insn_idx, callee->callsite);
9737 			return -EFAULT;
9738 		}
9739 	} else {
9740 		/* return to the caller whatever r0 had in the callee */
9741 		caller->regs[BPF_REG_0] = *r0;
9742 	}
9743 
9744 	/* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
9745 	 * there function call logic would reschedule callback visit. If iteration
9746 	 * converges is_state_visited() would prune that visit eventually.
9747 	 */
9748 	in_callback_fn = callee->in_callback_fn;
9749 	if (in_callback_fn)
9750 		*insn_idx = callee->callsite;
9751 	else
9752 		*insn_idx = callee->callsite + 1;
9753 
9754 	if (env->log.level & BPF_LOG_LEVEL) {
9755 		verbose(env, "returning from callee:\n");
9756 		print_verifier_state(env, state, callee->frameno, true);
9757 		verbose(env, "to caller at %d:\n", *insn_idx);
9758 		print_verifier_state(env, state, caller->frameno, true);
9759 	}
9760 	/* clear everything in the callee. In case of exceptional exits using
9761 	 * bpf_throw, this will be done by copy_verifier_state for extra frames. */
9762 	free_func_state(callee);
9763 	state->frame[state->curframe--] = NULL;
9764 	invalidate_outgoing_stack_args(env, caller);
9765 
9766 	/* for callbacks widen imprecise scalars to make programs like below verify:
9767 	 *
9768 	 *   struct ctx { int i; }
9769 	 *   void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
9770 	 *   ...
9771 	 *   struct ctx = { .i = 0; }
9772 	 *   bpf_loop(100, cb, &ctx, 0);
9773 	 *
9774 	 * This is similar to what is done in process_iter_next_call() for open
9775 	 * coded iterators.
9776 	 */
9777 	prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL;
9778 	if (prev_st) {
9779 		err = widen_imprecise_scalars(env, prev_st, state);
9780 		if (err)
9781 			return err;
9782 	}
9783 	return 0;
9784 }
9785 
9786 static int do_refine_retval_range(struct bpf_verifier_env *env,
9787 				  struct bpf_reg_state *regs, int ret_type,
9788 				  int func_id,
9789 				  struct bpf_call_arg_meta *meta)
9790 {
9791 	struct bpf_retval_range range;
9792 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
9793 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
9794 
9795 	if (ret_type != RET_INTEGER)
9796 		return 0;
9797 
9798 	switch (func_id) {
9799 	case BPF_FUNC_get_stack:
9800 	case BPF_FUNC_get_task_stack:
9801 	case BPF_FUNC_probe_read_str:
9802 	case BPF_FUNC_probe_read_kernel_str:
9803 	case BPF_FUNC_probe_read_user_str:
9804 		reg_set_srange64(ret_reg, -MAX_ERRNO, meta->msize_max_value);
9805 		reg_set_srange32(ret_reg, -MAX_ERRNO, meta->msize_max_value);
9806 		reg_bounds_sync(ret_reg);
9807 		break;
9808 	case BPF_FUNC_get_smp_processor_id:
9809 		reg_set_urange64(ret_reg, 0, nr_cpu_ids - 1);
9810 		reg_set_urange32(ret_reg, 0, nr_cpu_ids - 1);
9811 		reg_bounds_sync(ret_reg);
9812 		break;
9813 	case BPF_FUNC_get_retval:
9814 		/*
9815 		 * bpf_get_retval may see arbitrary value passed by bpf_prog_run_array_cg for
9816 		 * CGROUP_GETSOCKOPT type.
9817 		 */
9818 		if (prog_type == BPF_PROG_TYPE_CGROUP_SOCKOPT &&
9819 		    env->prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT)
9820 			break;
9821 
9822 		if (prog_type == BPF_PROG_TYPE_LSM &&
9823 		    env->prog->expected_attach_type == BPF_LSM_CGROUP) {
9824 			if (!env->prog->aux->attach_func_proto->type)
9825 				break;
9826 			bpf_lsm_get_retval_range(env->prog, &range);
9827 		} else {
9828 			range.minval = -MAX_ERRNO;
9829 			range.maxval = 0;
9830 		}
9831 
9832 		reg_set_srange64(ret_reg, range.minval, range.maxval);
9833 		reg_set_srange32(ret_reg, range.minval, range.maxval);
9834 		reg_bounds_sync(ret_reg);
9835 		break;
9836 	}
9837 
9838 	return reg_bounds_sanity_check(env, ret_reg, "retval");
9839 }
9840 
9841 static int
9842 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
9843 		int func_id, int insn_idx)
9844 {
9845 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9846 	struct bpf_map *map = meta->map.ptr;
9847 
9848 	if (func_id != BPF_FUNC_tail_call &&
9849 	    func_id != BPF_FUNC_map_lookup_elem &&
9850 	    func_id != BPF_FUNC_map_update_elem &&
9851 	    func_id != BPF_FUNC_map_delete_elem &&
9852 	    func_id != BPF_FUNC_map_push_elem &&
9853 	    func_id != BPF_FUNC_map_pop_elem &&
9854 	    func_id != BPF_FUNC_map_peek_elem &&
9855 	    func_id != BPF_FUNC_for_each_map_elem &&
9856 	    func_id != BPF_FUNC_redirect_map &&
9857 	    func_id != BPF_FUNC_map_lookup_percpu_elem)
9858 		return 0;
9859 
9860 	if (map == NULL) {
9861 		verifier_bug(env, "expected map for helper call");
9862 		return -EFAULT;
9863 	}
9864 
9865 	/* In case of read-only, some additional restrictions
9866 	 * need to be applied in order to prevent altering the
9867 	 * state of the map from program side.
9868 	 */
9869 	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
9870 	    (func_id == BPF_FUNC_map_delete_elem ||
9871 	     func_id == BPF_FUNC_map_update_elem ||
9872 	     func_id == BPF_FUNC_map_push_elem ||
9873 	     func_id == BPF_FUNC_map_pop_elem)) {
9874 		verbose(env, "write into map forbidden\n");
9875 		return -EACCES;
9876 	}
9877 
9878 	if (!aux->map_ptr_state.map_ptr)
9879 		bpf_map_ptr_store(aux, meta->map.ptr,
9880 				  !meta->map.ptr->bypass_spec_v1, false);
9881 	else if (aux->map_ptr_state.map_ptr != meta->map.ptr)
9882 		bpf_map_ptr_store(aux, meta->map.ptr,
9883 				  !meta->map.ptr->bypass_spec_v1, true);
9884 	return 0;
9885 }
9886 
9887 static int
9888 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
9889 		int func_id, int insn_idx)
9890 {
9891 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9892 	struct bpf_reg_state *reg;
9893 	struct bpf_map *map = meta->map.ptr;
9894 	u64 val, max;
9895 	int err;
9896 
9897 	if (func_id != BPF_FUNC_tail_call)
9898 		return 0;
9899 	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
9900 		verbose(env, "expected prog array map for tail call");
9901 		return -EINVAL;
9902 	}
9903 
9904 	reg = reg_state(env, BPF_REG_3);
9905 	val = reg->var_off.value;
9906 	max = map->max_entries;
9907 
9908 	if (!(is_reg_const(reg, false) && val < max)) {
9909 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
9910 		return 0;
9911 	}
9912 
9913 	err = mark_chain_precision(env, BPF_REG_3);
9914 	if (err)
9915 		return err;
9916 	if (bpf_map_key_unseen(aux))
9917 		bpf_map_key_store(aux, val);
9918 	else if (!bpf_map_key_poisoned(aux) &&
9919 		  bpf_map_key_immediate(aux) != val)
9920 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
9921 	return 0;
9922 }
9923 
9924 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
9925 {
9926 	struct bpf_verifier_state *state = env->cur_state;
9927 	enum bpf_prog_type type = resolve_prog_type(env->prog);
9928 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
9929 	bool refs_lingering = false;
9930 	int i;
9931 
9932 	if (!exception_exit && cur_func(env)->frameno)
9933 		return 0;
9934 
9935 	for (i = 0; i < state->acquired_refs; i++) {
9936 		if (state->refs[i].type != REF_TYPE_PTR)
9937 			continue;
9938 		/* Allow struct_ops programs to return a referenced kptr back to
9939 		 * kernel. Type checks are performed later in check_return_code.
9940 		 */
9941 		if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit &&
9942 		    reg->id == state->refs[i].id)
9943 			continue;
9944 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
9945 			state->refs[i].id, state->refs[i].insn_idx);
9946 		refs_lingering = true;
9947 	}
9948 	return refs_lingering ? -EINVAL : 0;
9949 }
9950 
9951 static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit, bool check_lock, const char *prefix)
9952 {
9953 	int err;
9954 
9955 	if (check_lock && env->cur_state->active_locks) {
9956 		verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix);
9957 		return -EINVAL;
9958 	}
9959 
9960 	err = check_reference_leak(env, exception_exit);
9961 	if (err) {
9962 		verbose(env, "%s would lead to reference leak\n", prefix);
9963 		return err;
9964 	}
9965 
9966 	if (check_lock && env->cur_state->active_irq_id) {
9967 		verbose(env, "%s cannot be used inside bpf_local_irq_save-ed region\n", prefix);
9968 		return -EINVAL;
9969 	}
9970 
9971 	if (check_lock && env->cur_state->active_rcu_locks) {
9972 		verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix);
9973 		return -EINVAL;
9974 	}
9975 
9976 	if (check_lock && env->cur_state->active_preempt_locks) {
9977 		verbose(env, "%s cannot be used inside bpf_preempt_disable-ed region\n", prefix);
9978 		return -EINVAL;
9979 	}
9980 
9981 	return 0;
9982 }
9983 
9984 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
9985 				   struct bpf_reg_state *regs)
9986 {
9987 	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
9988 	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
9989 	struct bpf_map *fmt_map = fmt_reg->map_ptr;
9990 	struct bpf_bprintf_data data = {};
9991 	int err, fmt_map_off, num_args;
9992 	u64 fmt_addr;
9993 	char *fmt;
9994 
9995 	/* data must be an array of u64 */
9996 	if (data_len_reg->var_off.value % 8)
9997 		return -EINVAL;
9998 	num_args = data_len_reg->var_off.value / 8;
9999 
10000 	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
10001 	 * and map_direct_value_addr is set.
10002 	 */
10003 	fmt_map_off = fmt_reg->var_off.value;
10004 	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
10005 						  fmt_map_off);
10006 	if (err) {
10007 		verbose(env, "failed to retrieve map value address\n");
10008 		return -EFAULT;
10009 	}
10010 	fmt = (char *)(long)fmt_addr + fmt_map_off;
10011 
10012 	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
10013 	 * can focus on validating the format specifiers.
10014 	 */
10015 	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
10016 	if (err < 0)
10017 		verbose(env, "Invalid format string\n");
10018 
10019 	return err;
10020 }
10021 
10022 static int check_get_func_ip(struct bpf_verifier_env *env)
10023 {
10024 	enum bpf_prog_type type = resolve_prog_type(env->prog);
10025 	int func_id = BPF_FUNC_get_func_ip;
10026 
10027 	if (type == BPF_PROG_TYPE_TRACING) {
10028 		if (!bpf_prog_has_trampoline(env->prog)) {
10029 			verbose(env, "func %s#%d supported only for fentry/fexit/fsession/fmod_ret programs\n",
10030 				func_id_name(func_id), func_id);
10031 			return -ENOTSUPP;
10032 		}
10033 		return 0;
10034 	} else if (type == BPF_PROG_TYPE_KPROBE) {
10035 		return 0;
10036 	}
10037 
10038 	verbose(env, "func %s#%d not supported for program type %d\n",
10039 		func_id_name(func_id), func_id, type);
10040 	return -ENOTSUPP;
10041 }
10042 
10043 static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env)
10044 {
10045 	return &env->insn_aux_data[env->insn_idx];
10046 }
10047 
10048 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
10049 {
10050 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_4);
10051 	bool reg_is_null = bpf_register_is_null(reg);
10052 
10053 	if (reg_is_null)
10054 		mark_chain_precision(env, BPF_REG_4);
10055 
10056 	return reg_is_null;
10057 }
10058 
10059 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
10060 {
10061 	struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
10062 
10063 	if (!state->initialized) {
10064 		state->initialized = 1;
10065 		state->fit_for_inline = loop_flag_is_zero(env);
10066 		state->callback_subprogno = subprogno;
10067 		return;
10068 	}
10069 
10070 	if (!state->fit_for_inline)
10071 		return;
10072 
10073 	state->fit_for_inline = (loop_flag_is_zero(env) &&
10074 				 state->callback_subprogno == subprogno);
10075 }
10076 
10077 /* Returns whether or not the given map can potentially elide
10078  * lookup return value nullness check. This is possible if the key
10079  * is statically known.
10080  */
10081 static bool can_elide_value_nullness(const struct bpf_map *map)
10082 {
10083 	if (map->map_flags & BPF_F_INNER_MAP)
10084 		return false;
10085 
10086 	switch (map->map_type) {
10087 	case BPF_MAP_TYPE_ARRAY:
10088 	case BPF_MAP_TYPE_PERCPU_ARRAY:
10089 		return true;
10090 	default:
10091 		return false;
10092 	}
10093 }
10094 
10095 int bpf_get_helper_proto(struct bpf_verifier_env *env, int func_id,
10096 			 const struct bpf_func_proto **ptr)
10097 {
10098 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID)
10099 		return -ERANGE;
10100 
10101 	if (!env->ops->get_func_proto)
10102 		return -EINVAL;
10103 
10104 	*ptr = env->ops->get_func_proto(func_id, env->prog);
10105 	return *ptr && (*ptr)->func ? 0 : -EINVAL;
10106 }
10107 
10108 /* Check if we're in a sleepable context. */
10109 static inline bool in_sleepable_context(struct bpf_verifier_env *env)
10110 {
10111 	return !env->cur_state->active_rcu_locks &&
10112 	       !env->cur_state->active_preempt_locks &&
10113 	       !env->cur_state->active_locks &&
10114 	       !env->cur_state->active_irq_id &&
10115 	       in_sleepable(env);
10116 }
10117 
10118 static const char *non_sleepable_context_description(struct bpf_verifier_env *env)
10119 {
10120 	if (env->cur_state->active_rcu_locks)
10121 		return "rcu_read_lock region";
10122 	if (env->cur_state->active_preempt_locks)
10123 		return "non-preemptible region";
10124 	if (env->cur_state->active_irq_id)
10125 		return "IRQ-disabled region";
10126 	if (env->cur_state->active_locks)
10127 		return "lock region";
10128 	return "non-sleepable prog";
10129 }
10130 
10131 static int release_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
10132 		       bool convert_rcu, bool release_dynptr)
10133 {
10134 	int err = -EINVAL;
10135 
10136 	if (bpf_register_is_null(reg))
10137 		return 0;
10138 
10139 	if (release_dynptr)
10140 		err = unmark_stack_slots_dynptr(env, reg);
10141 	else if (convert_rcu)
10142 		err = ref_convert_alloc_rcu_protected(env, reg->id);
10143 	else if (reg_is_referenced(env, reg))
10144 		err = release_reference(env, reg->id);
10145 
10146 	return err;
10147 }
10148 
10149 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10150 			     int *insn_idx_p)
10151 {
10152 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
10153 	bool returns_cpu_specific_alloc_ptr = false;
10154 	const struct bpf_func_proto *fn = NULL;
10155 	enum bpf_return_type ret_type;
10156 	enum bpf_type_flag ret_flag;
10157 	struct bpf_reg_state *regs;
10158 	struct bpf_call_arg_meta meta;
10159 	int insn_idx = *insn_idx_p;
10160 	bool changes_data;
10161 	int i, err, func_id;
10162 
10163 	/* find function prototype */
10164 	func_id = insn->imm;
10165 	err = bpf_get_helper_proto(env, insn->imm, &fn);
10166 	if (err == -ERANGE) {
10167 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
10168 		return -EINVAL;
10169 	}
10170 
10171 	if (err) {
10172 		verbose(env, "program of this type cannot use helper %s#%d\n",
10173 			func_id_name(func_id), func_id);
10174 		return err;
10175 	}
10176 
10177 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
10178 	if (!env->prog->gpl_compatible && fn->gpl_only) {
10179 		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
10180 		return -EINVAL;
10181 	}
10182 
10183 	if (fn->allowed && !fn->allowed(env->prog)) {
10184 		verbose(env, "helper call is not allowed in probe\n");
10185 		return -EINVAL;
10186 	}
10187 
10188 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
10189 	changes_data = bpf_helper_changes_pkt_data(func_id);
10190 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
10191 		verifier_bug(env, "func %s#%d: r1 != ctx", func_id_name(func_id), func_id);
10192 		return -EFAULT;
10193 	}
10194 
10195 	memset(&meta, 0, sizeof(meta));
10196 	meta.pkt_access = fn->pkt_access;
10197 
10198 	err = check_func_proto(fn, &meta);
10199 	if (err) {
10200 		verifier_bug(env, "incorrect func proto %s#%d", func_id_name(func_id), func_id);
10201 		return err;
10202 	}
10203 
10204 	if (fn->might_sleep && !in_sleepable_context(env)) {
10205 		verbose(env, "sleepable helper %s#%d in %s\n", func_id_name(func_id), func_id,
10206 			non_sleepable_context_description(env));
10207 		return -EINVAL;
10208 	}
10209 
10210 	/* Track non-sleepable context for helpers. */
10211 	if (!in_sleepable_context(env))
10212 		env->insn_aux_data[insn_idx].non_sleepable = true;
10213 
10214 	meta.func_id = func_id;
10215 	/* check args */
10216 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
10217 		err = check_func_arg(env, i, &meta, fn, insn_idx);
10218 		if (err)
10219 			return err;
10220 	}
10221 
10222 	err = record_func_map(env, &meta, func_id, insn_idx);
10223 	if (err)
10224 		return err;
10225 
10226 	err = record_func_key(env, &meta, func_id, insn_idx);
10227 	if (err)
10228 		return err;
10229 
10230 	regs = cur_regs(env);
10231 
10232 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
10233 	 * is inferred from register state.
10234 	 */
10235 	for (i = 0; i < meta.access_size; i++) {
10236 		err = check_mem_access(env, insn_idx, regs + meta.regno, argno_from_reg(meta.regno), i, BPF_B,
10237 				       BPF_WRITE, -1, false, false);
10238 		if (err)
10239 			return err;
10240 	}
10241 
10242 	if (meta.release_regno) {
10243 		struct bpf_reg_state *reg = &regs[meta.release_regno];
10244 		bool convert_rcu = (func_id == BPF_FUNC_kptr_xchg) && in_rcu_cs(env) &&
10245 				   (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU);
10246 
10247 		err = release_reg(env, reg, convert_rcu, !!meta.dynptr.id);
10248 		if (err)
10249 			return err;
10250 	}
10251 
10252 	switch (func_id) {
10253 	case BPF_FUNC_tail_call:
10254 		err = check_resource_leak(env, false, true, "tail_call");
10255 		if (err)
10256 			return err;
10257 		break;
10258 	case BPF_FUNC_get_local_storage:
10259 		/* check that flags argument in get_local_storage(map, flags) is 0,
10260 		 * this is required because get_local_storage() can't return an error.
10261 		 */
10262 		if (!bpf_register_is_null(&regs[BPF_REG_2])) {
10263 			verbose(env, "get_local_storage() doesn't support non-zero flags\n");
10264 			return -EINVAL;
10265 		}
10266 		break;
10267 	case BPF_FUNC_for_each_map_elem:
10268 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10269 					 set_map_elem_callback_state);
10270 		break;
10271 	case BPF_FUNC_timer_set_callback:
10272 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10273 					 set_timer_callback_state);
10274 		break;
10275 	case BPF_FUNC_find_vma:
10276 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10277 					 set_find_vma_callback_state);
10278 		break;
10279 	case BPF_FUNC_snprintf:
10280 		err = check_bpf_snprintf_call(env, regs);
10281 		break;
10282 	case BPF_FUNC_loop:
10283 		update_loop_inline_state(env, meta.subprogno);
10284 		/* Verifier relies on R1 value to determine if bpf_loop() iteration
10285 		 * is finished, thus mark it precise.
10286 		 */
10287 		err = mark_chain_precision(env, BPF_REG_1);
10288 		if (err)
10289 			return err;
10290 		if (cur_func(env)->callback_depth < reg_umax(&regs[BPF_REG_1])) {
10291 			err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10292 						 set_loop_callback_state);
10293 		} else {
10294 			cur_func(env)->callback_depth = 0;
10295 			if (env->log.level & BPF_LOG_LEVEL2)
10296 				verbose(env, "frame%d bpf_loop iteration limit reached\n",
10297 					env->cur_state->curframe);
10298 		}
10299 		break;
10300 	case BPF_FUNC_dynptr_from_mem:
10301 		if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
10302 			verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
10303 				reg_type_str(env, regs[BPF_REG_1].type));
10304 			return -EACCES;
10305 		}
10306 		break;
10307 	case BPF_FUNC_set_retval:
10308 	{
10309 		struct bpf_retval_range range = {
10310 			.minval = -MAX_ERRNO,
10311 			.maxval = 0,
10312 			.return_32bit = true
10313 		};
10314 		struct bpf_reg_state *r1 = &regs[BPF_REG_1];
10315 
10316 		if (r1->type != SCALAR_VALUE) {
10317 			verbose(env, "R1 is not a scalar\n");
10318 			return -EINVAL;
10319 		}
10320 
10321 		/* CGROUP_GETSOCKOPT is allowed to return arbitrary value */
10322 		if (prog_type == BPF_PROG_TYPE_CGROUP_SOCKOPT &&
10323 		    env->prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT)
10324 			break;
10325 
10326 		if (prog_type == BPF_PROG_TYPE_LSM &&
10327 		    env->prog->expected_attach_type == BPF_LSM_CGROUP) {
10328 			if (!env->prog->aux->attach_func_proto->type) {
10329 				/* Make sure programs that attach to void
10330 				 * hooks don't try to modify return value.
10331 				 */
10332 				verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
10333 				return -EINVAL;
10334 			}
10335 			bpf_lsm_get_retval_range(env->prog, &range);
10336 		}
10337 
10338 		err = mark_chain_precision(env, BPF_REG_1);
10339 		if (err)
10340 			return err;
10341 
10342 		if (!retval_range_within(range, r1)) {
10343 			verbose_invalid_scalar(env, r1, range, "At bpf_set_retval", "R1");
10344 			return -EINVAL;
10345 		}
10346 
10347 		break;
10348 	}
10349 	case BPF_FUNC_dynptr_write:
10350 	{
10351 		enum bpf_dynptr_type dynptr_type = meta.dynptr.type;
10352 
10353 		if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
10354 			return -EFAULT;
10355 
10356 		if (dynptr_type == BPF_DYNPTR_TYPE_SKB ||
10357 		    dynptr_type == BPF_DYNPTR_TYPE_SKB_META)
10358 			/* this will trigger clear_all_pkt_pointers(), which will
10359 			 * invalidate all dynptr slices associated with the skb
10360 			 */
10361 			changes_data = true;
10362 
10363 		break;
10364 	}
10365 	case BPF_FUNC_per_cpu_ptr:
10366 	case BPF_FUNC_this_cpu_ptr:
10367 	{
10368 		struct bpf_reg_state *reg = &regs[BPF_REG_1];
10369 		const struct btf_type *type;
10370 
10371 		if (reg->type & MEM_RCU) {
10372 			type = btf_type_by_id(reg->btf, reg->btf_id);
10373 			if (!type || !btf_type_is_struct(type)) {
10374 				verbose(env, "Helper has invalid btf/btf_id in R1\n");
10375 				return -EFAULT;
10376 			}
10377 			returns_cpu_specific_alloc_ptr = true;
10378 			env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true;
10379 		}
10380 		break;
10381 	}
10382 	case BPF_FUNC_user_ringbuf_drain:
10383 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10384 					 set_user_ringbuf_callback_state);
10385 		break;
10386 	}
10387 
10388 	if (err)
10389 		return err;
10390 
10391 	/* reset caller saved regs */
10392 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
10393 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
10394 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
10395 	}
10396 	invalidate_outgoing_stack_args(env, cur_func(env));
10397 
10398 	/* helper call returns 64-bit value. */
10399 	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
10400 
10401 	/* update return register (already marked as written above) */
10402 	ret_type = fn->ret_type;
10403 	ret_flag = type_flag(ret_type);
10404 
10405 	switch (base_type(ret_type)) {
10406 	case RET_INTEGER:
10407 		/* sets type to SCALAR_VALUE */
10408 		mark_reg_unknown(env, regs, BPF_REG_0);
10409 		break;
10410 	case RET_VOID:
10411 		regs[BPF_REG_0].type = NOT_INIT;
10412 		break;
10413 	case RET_PTR_TO_MAP_VALUE:
10414 		/* There is no offset yet applied, variable or fixed */
10415 		mark_reg_known_zero(env, regs, BPF_REG_0);
10416 		/* remember map_ptr, so that check_map_access()
10417 		 * can check 'value_size' boundary of memory access
10418 		 * to map element returned from bpf_map_lookup_elem()
10419 		 */
10420 		if (meta.map.ptr == NULL) {
10421 			verifier_bug(env, "unexpected null map_ptr");
10422 			return -EFAULT;
10423 		}
10424 
10425 		if (func_id == BPF_FUNC_map_lookup_elem &&
10426 		    can_elide_value_nullness(meta.map.ptr) &&
10427 		    meta.const_map_key >= 0 &&
10428 		    meta.const_map_key < meta.map.ptr->max_entries)
10429 			ret_flag &= ~PTR_MAYBE_NULL;
10430 
10431 		regs[BPF_REG_0].map_ptr = meta.map.ptr;
10432 		regs[BPF_REG_0].map_uid = meta.map.uid;
10433 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
10434 		if (!type_may_be_null(ret_flag) &&
10435 		    btf_record_has_field(meta.map.ptr->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
10436 			regs[BPF_REG_0].id = ++env->id_gen;
10437 		}
10438 		break;
10439 	case RET_PTR_TO_SOCKET:
10440 		mark_reg_known_zero(env, regs, BPF_REG_0);
10441 		regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
10442 		break;
10443 	case RET_PTR_TO_SOCK_COMMON:
10444 		mark_reg_known_zero(env, regs, BPF_REG_0);
10445 		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
10446 		break;
10447 	case RET_PTR_TO_TCP_SOCK:
10448 		mark_reg_known_zero(env, regs, BPF_REG_0);
10449 		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
10450 		break;
10451 	case RET_PTR_TO_MEM:
10452 		mark_reg_known_zero(env, regs, BPF_REG_0);
10453 		regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10454 		regs[BPF_REG_0].mem_size = meta.mem_size;
10455 		break;
10456 	case RET_PTR_TO_MEM_OR_BTF_ID:
10457 	{
10458 		const struct btf_type *t;
10459 
10460 		mark_reg_known_zero(env, regs, BPF_REG_0);
10461 		t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
10462 		if (!btf_type_is_struct(t)) {
10463 			u32 tsize;
10464 			const struct btf_type *ret;
10465 			const char *tname;
10466 
10467 			/* resolve the type size of ksym. */
10468 			ret = btf_resolve_size(meta.ret_btf, t, &tsize);
10469 			if (IS_ERR(ret)) {
10470 				tname = btf_name_by_offset(meta.ret_btf, t->name_off);
10471 				verbose(env, "unable to resolve the size of type '%s': %ld\n",
10472 					tname, PTR_ERR(ret));
10473 				return -EINVAL;
10474 			}
10475 			regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10476 			regs[BPF_REG_0].mem_size = tsize;
10477 		} else {
10478 			if (returns_cpu_specific_alloc_ptr) {
10479 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU;
10480 			} else {
10481 				/* MEM_RDONLY may be carried from ret_flag, but it
10482 				 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
10483 				 * it will confuse the check of PTR_TO_BTF_ID in
10484 				 * check_mem_access().
10485 				 */
10486 				ret_flag &= ~MEM_RDONLY;
10487 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10488 			}
10489 
10490 			regs[BPF_REG_0].btf = meta.ret_btf;
10491 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
10492 		}
10493 		break;
10494 	}
10495 	case RET_PTR_TO_BTF_ID:
10496 	{
10497 		struct btf *ret_btf;
10498 		int ret_btf_id;
10499 
10500 		mark_reg_known_zero(env, regs, BPF_REG_0);
10501 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10502 		if (func_id == BPF_FUNC_kptr_xchg) {
10503 			ret_btf = meta.kptr_field->kptr.btf;
10504 			ret_btf_id = meta.kptr_field->kptr.btf_id;
10505 			if (!btf_is_kernel(ret_btf)) {
10506 				regs[BPF_REG_0].type |= MEM_ALLOC;
10507 				if (meta.kptr_field->type == BPF_KPTR_PERCPU)
10508 					regs[BPF_REG_0].type |= MEM_PERCPU;
10509 			}
10510 		} else {
10511 			if (fn->ret_btf_id == BPF_PTR_POISON) {
10512 				verifier_bug(env, "func %s has non-overwritten BPF_PTR_POISON return type",
10513 					     func_id_name(func_id));
10514 				return -EFAULT;
10515 			}
10516 			ret_btf = btf_vmlinux;
10517 			ret_btf_id = *fn->ret_btf_id;
10518 		}
10519 		if (ret_btf_id == 0) {
10520 			verbose(env, "invalid return type %u of func %s#%d\n",
10521 				base_type(ret_type), func_id_name(func_id),
10522 				func_id);
10523 			return -EINVAL;
10524 		}
10525 		regs[BPF_REG_0].btf = ret_btf;
10526 		regs[BPF_REG_0].btf_id = ret_btf_id;
10527 		break;
10528 	}
10529 	default:
10530 		verbose(env, "unknown return type %u of func %s#%d\n",
10531 			base_type(ret_type), func_id_name(func_id), func_id);
10532 		return -EINVAL;
10533 	}
10534 
10535 	if (type_may_be_null(regs[BPF_REG_0].type))
10536 		regs[BPF_REG_0].id = ++env->id_gen;
10537 
10538 	if (is_ptr_cast_function(func_id) &&
10539 	    find_reference_state(env->cur_state, meta.ref_obj.id)) {
10540 		struct bpf_verifier_state *branch;
10541 		struct bpf_reg_state *r0;
10542 
10543 		err = validate_ref_obj(env, &meta.ref_obj);
10544 		if (err)
10545 			return err;
10546 
10547 		/*
10548 		 * In order for a release of any of the original or cast pointers
10549 		 * to invalidate all other pointers, reuse the same reference id for
10550 		 * the cast result.
10551 		 * This reference id can't be used for nullness propagation,
10552 		 * as cast might return NULL for a non-NULL input.
10553 		 * Hence, explore the NULL case as a separate branch.
10554 		 */
10555 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
10556 		if (IS_ERR(branch))
10557 			return PTR_ERR(branch);
10558 
10559 		r0 = &branch->frame[branch->curframe]->regs[BPF_REG_0];
10560 		__mark_reg_known_zero(r0);
10561 		r0->type = SCALAR_VALUE;
10562 
10563 		regs[BPF_REG_0].type &= ~PTR_MAYBE_NULL;
10564 		regs[BPF_REG_0].id = meta.ref_obj.id;
10565 	} else if (is_acquire_function(func_id, meta.map.ptr)) {
10566 		int id = acquire_reference(env, insn_idx, 0);
10567 
10568 		if (id < 0)
10569 			return id;
10570 
10571 		regs[BPF_REG_0].id = id;
10572 	}
10573 
10574 	if (func_id == BPF_FUNC_dynptr_data)
10575 		regs[BPF_REG_0].parent_id = meta.dynptr.id;
10576 
10577 	err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
10578 	if (err)
10579 		return err;
10580 
10581 	err = check_map_func_compatibility(env, meta.map.ptr, func_id);
10582 	if (err)
10583 		return err;
10584 
10585 	if ((func_id == BPF_FUNC_get_stack ||
10586 	     func_id == BPF_FUNC_get_task_stack) &&
10587 	    !env->prog->has_callchain_buf) {
10588 		const char *err_str;
10589 
10590 #ifdef CONFIG_PERF_EVENTS
10591 		err = get_callchain_buffers(sysctl_perf_event_max_stack);
10592 		err_str = "cannot get callchain buffer for func %s#%d\n";
10593 #else
10594 		err = -ENOTSUPP;
10595 		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
10596 #endif
10597 		if (err) {
10598 			verbose(env, err_str, func_id_name(func_id), func_id);
10599 			return err;
10600 		}
10601 
10602 		env->prog->has_callchain_buf = true;
10603 	}
10604 
10605 	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
10606 		env->prog->call_get_stack = true;
10607 
10608 	if (func_id == BPF_FUNC_get_func_ip) {
10609 		if (check_get_func_ip(env))
10610 			return -ENOTSUPP;
10611 		env->prog->call_get_func_ip = true;
10612 	}
10613 
10614 	if (func_id == BPF_FUNC_tail_call) {
10615 		if (env->cur_state->curframe) {
10616 			struct bpf_verifier_state *branch;
10617 
10618 			mark_reg_scratched(env, BPF_REG_0);
10619 			branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
10620 			if (IS_ERR(branch))
10621 				return PTR_ERR(branch);
10622 			clear_all_pkt_pointers(env);
10623 			mark_reg_unknown(env, regs, BPF_REG_0);
10624 			err = prepare_func_exit(env, &env->insn_idx);
10625 			if (err)
10626 				return err;
10627 			env->insn_idx--;
10628 		} else {
10629 			changes_data = false;
10630 		}
10631 	}
10632 
10633 	if (changes_data)
10634 		clear_all_pkt_pointers(env);
10635 	return 0;
10636 }
10637 
10638 /* mark_btf_func_reg_size() is used when the reg size is determined by
10639  * the BTF func_proto's return value size and argument.
10640  */
10641 static void __mark_btf_func_reg_size(struct bpf_verifier_env *env, struct bpf_reg_state *regs,
10642 				     u32 regno, size_t reg_size)
10643 {
10644 	struct bpf_reg_state *reg = &regs[regno];
10645 
10646 	if (regno == BPF_REG_0) {
10647 		/* Function return value */
10648 		reg->subreg_def = reg_size == sizeof(u64) ?
10649 			DEF_NOT_SUBREG : env->insn_idx + 1;
10650 	} else if (reg_size == sizeof(u64)) {
10651 		/* Function argument */
10652 		mark_insn_zext(env, reg);
10653 	}
10654 }
10655 
10656 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
10657 				   size_t reg_size)
10658 {
10659 	return __mark_btf_func_reg_size(env, cur_regs(env), regno, reg_size);
10660 }
10661 
10662 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
10663 {
10664 	return meta->kfunc_flags & KF_ACQUIRE;
10665 }
10666 
10667 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
10668 {
10669 	return meta->kfunc_flags & KF_RELEASE;
10670 }
10671 
10672 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
10673 {
10674 	return meta->kfunc_flags & KF_DESTRUCTIVE;
10675 }
10676 
10677 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
10678 {
10679 	return meta->kfunc_flags & KF_RCU;
10680 }
10681 
10682 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
10683 {
10684 	return meta->kfunc_flags & KF_RCU_PROTECTED;
10685 }
10686 
10687 static bool is_kfunc_arg_mem_size(const struct btf *btf,
10688 				  const struct btf_param *arg,
10689 				  const struct bpf_reg_state *reg)
10690 {
10691 	const struct btf_type *t;
10692 
10693 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10694 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10695 		return false;
10696 
10697 	return btf_param_match_suffix(btf, arg, "__sz");
10698 }
10699 
10700 static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
10701 					const struct btf_param *arg,
10702 					const struct bpf_reg_state *reg)
10703 {
10704 	const struct btf_type *t;
10705 
10706 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10707 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10708 		return false;
10709 
10710 	return btf_param_match_suffix(btf, arg, "__szk");
10711 }
10712 
10713 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
10714 {
10715 	return btf_param_match_suffix(btf, arg, "__k");
10716 }
10717 
10718 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
10719 {
10720 	return btf_param_match_suffix(btf, arg, "__ign");
10721 }
10722 
10723 static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg)
10724 {
10725 	return btf_param_match_suffix(btf, arg, "__map");
10726 }
10727 
10728 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
10729 {
10730 	return btf_param_match_suffix(btf, arg, "__alloc");
10731 }
10732 
10733 static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
10734 {
10735 	return btf_param_match_suffix(btf, arg, "__uninit");
10736 }
10737 
10738 static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg)
10739 {
10740 	return btf_param_match_suffix(btf, arg, "__refcounted_kptr");
10741 }
10742 
10743 static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg)
10744 {
10745 	return btf_param_match_suffix(btf, arg, "__nullable");
10746 }
10747 
10748 static bool is_kfunc_arg_nonown_allowed(const struct btf *btf, const struct btf_param *arg)
10749 {
10750 	return btf_param_match_suffix(btf, arg, "__nonown_allowed");
10751 }
10752 
10753 static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg)
10754 {
10755 	return btf_param_match_suffix(btf, arg, "__str");
10756 }
10757 
10758 static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param *arg)
10759 {
10760 	return btf_param_match_suffix(btf, arg, "__irq_flag");
10761 }
10762 
10763 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
10764 					  const struct btf_param *arg,
10765 					  const char *name)
10766 {
10767 	int len, target_len = strlen(name);
10768 	const char *param_name;
10769 
10770 	param_name = btf_name_by_offset(btf, arg->name_off);
10771 	if (str_is_empty(param_name))
10772 		return false;
10773 	len = strlen(param_name);
10774 	if (len != target_len)
10775 		return false;
10776 	if (strcmp(param_name, name))
10777 		return false;
10778 
10779 	return true;
10780 }
10781 
10782 enum {
10783 	KF_ARG_DYNPTR_ID,
10784 	KF_ARG_LIST_HEAD_ID,
10785 	KF_ARG_LIST_NODE_ID,
10786 	KF_ARG_RB_ROOT_ID,
10787 	KF_ARG_RB_NODE_ID,
10788 	KF_ARG_WORKQUEUE_ID,
10789 	KF_ARG_RES_SPIN_LOCK_ID,
10790 	KF_ARG_TASK_WORK_ID,
10791 	KF_ARG_PROG_AUX_ID,
10792 	KF_ARG_TIMER_ID
10793 };
10794 
10795 BTF_ID_LIST(kf_arg_btf_ids)
10796 BTF_ID(struct, bpf_dynptr)
10797 BTF_ID(struct, bpf_list_head)
10798 BTF_ID(struct, bpf_list_node)
10799 BTF_ID(struct, bpf_rb_root)
10800 BTF_ID(struct, bpf_rb_node)
10801 BTF_ID(struct, bpf_wq)
10802 BTF_ID(struct, bpf_res_spin_lock)
10803 BTF_ID(struct, bpf_task_work)
10804 BTF_ID(struct, bpf_prog_aux)
10805 BTF_ID(struct, bpf_timer)
10806 
10807 static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
10808 				    const struct btf_param *arg, int type)
10809 {
10810 	const struct btf_type *t;
10811 	u32 res_id;
10812 
10813 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10814 	if (!t)
10815 		return false;
10816 	if (!btf_type_is_ptr(t))
10817 		return false;
10818 	t = btf_type_skip_modifiers(btf, t->type, &res_id);
10819 	if (!t)
10820 		return false;
10821 	return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
10822 }
10823 
10824 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
10825 {
10826 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
10827 }
10828 
10829 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
10830 {
10831 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
10832 }
10833 
10834 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
10835 {
10836 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
10837 }
10838 
10839 static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
10840 {
10841 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
10842 }
10843 
10844 static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
10845 {
10846 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
10847 }
10848 
10849 static bool is_kfunc_arg_timer(const struct btf *btf, const struct btf_param *arg)
10850 {
10851 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TIMER_ID);
10852 }
10853 
10854 static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg)
10855 {
10856 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID);
10857 }
10858 
10859 static bool is_kfunc_arg_task_work(const struct btf *btf, const struct btf_param *arg)
10860 {
10861 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TASK_WORK_ID);
10862 }
10863 
10864 static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg)
10865 {
10866 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID);
10867 }
10868 
10869 static bool is_rbtree_node_type(const struct btf_type *t)
10870 {
10871 	return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_RB_NODE_ID]);
10872 }
10873 
10874 static bool is_list_node_type(const struct btf_type *t)
10875 {
10876 	return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_LIST_NODE_ID]);
10877 }
10878 
10879 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
10880 				  const struct btf_param *arg)
10881 {
10882 	const struct btf_type *t;
10883 
10884 	t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
10885 	if (!t)
10886 		return false;
10887 
10888 	return true;
10889 }
10890 
10891 static bool is_kfunc_arg_prog_aux(const struct btf *btf, const struct btf_param *arg)
10892 {
10893 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_PROG_AUX_ID);
10894 }
10895 
10896 /*
10897  * A kfunc with KF_IMPLICIT_ARGS has two prototypes in BTF:
10898  *   - the _impl prototype with full arg list (meta->func_proto)
10899  *   - the BPF API prototype w/o implicit args (func->type in BTF)
10900  * To determine whether an argument is implicit, we compare its position
10901  * against the number of arguments in the prototype w/o implicit args.
10902  */
10903 static bool is_kfunc_arg_implicit(const struct bpf_kfunc_call_arg_meta *meta, u32 arg_idx)
10904 {
10905 	const struct btf_type *func, *func_proto;
10906 	u32 argn;
10907 
10908 	if (!(meta->kfunc_flags & KF_IMPLICIT_ARGS))
10909 		return false;
10910 
10911 	func = btf_type_by_id(meta->btf, meta->func_id);
10912 	func_proto = btf_type_by_id(meta->btf, func->type);
10913 	argn = btf_type_vlen(func_proto);
10914 
10915 	return argn <= arg_idx;
10916 }
10917 
10918 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
10919 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
10920 					const struct btf *btf,
10921 					const struct btf_type *t, int rec)
10922 {
10923 	const struct btf_type *member_type;
10924 	const struct btf_member *member;
10925 	u32 i;
10926 
10927 	if (!btf_type_is_struct(t))
10928 		return false;
10929 
10930 	for_each_member(i, t, member) {
10931 		const struct btf_array *array;
10932 
10933 		member_type = btf_type_skip_modifiers(btf, member->type, NULL);
10934 		if (btf_type_is_struct(member_type)) {
10935 			if (rec >= 3) {
10936 				verbose(env, "max struct nesting depth exceeded\n");
10937 				return false;
10938 			}
10939 			if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
10940 				return false;
10941 			continue;
10942 		}
10943 		if (btf_type_is_array(member_type)) {
10944 			array = btf_array(member_type);
10945 			if (!array->nelems)
10946 				return false;
10947 			member_type = btf_type_skip_modifiers(btf, array->type, NULL);
10948 			if (!btf_type_is_scalar(member_type))
10949 				return false;
10950 			continue;
10951 		}
10952 		if (!btf_type_is_scalar(member_type))
10953 			return false;
10954 	}
10955 	return true;
10956 }
10957 
10958 enum kfunc_ptr_arg_type {
10959 	KF_ARG_PTR_TO_CTX,
10960 	KF_ARG_PTR_TO_ALLOC_BTF_ID,    /* Allocated object */
10961 	KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */
10962 	KF_ARG_PTR_TO_DYNPTR,
10963 	KF_ARG_PTR_TO_ITER,
10964 	KF_ARG_PTR_TO_LIST_HEAD,
10965 	KF_ARG_PTR_TO_LIST_NODE,
10966 	KF_ARG_PTR_TO_BTF_ID,	       /* Also covers reg2btf_ids conversions */
10967 	KF_ARG_PTR_TO_MEM,
10968 	KF_ARG_PTR_TO_MEM_SIZE,	       /* Size derived from next argument, skip it */
10969 	KF_ARG_PTR_TO_CALLBACK,
10970 	KF_ARG_PTR_TO_RB_ROOT,
10971 	KF_ARG_PTR_TO_RB_NODE,
10972 	KF_ARG_PTR_TO_NULL,
10973 	KF_ARG_PTR_TO_CONST_STR,
10974 	KF_ARG_PTR_TO_MAP,
10975 	KF_ARG_PTR_TO_TIMER,
10976 	KF_ARG_PTR_TO_WORKQUEUE,
10977 	KF_ARG_PTR_TO_IRQ_FLAG,
10978 	KF_ARG_PTR_TO_RES_SPIN_LOCK,
10979 	KF_ARG_PTR_TO_TASK_WORK,
10980 };
10981 
10982 enum special_kfunc_type {
10983 	KF_bpf_obj_new_impl,
10984 	KF_bpf_obj_new,
10985 	KF_bpf_obj_drop_impl,
10986 	KF_bpf_obj_drop,
10987 	KF_bpf_refcount_acquire_impl,
10988 	KF_bpf_refcount_acquire,
10989 	KF_bpf_list_push_front_impl,
10990 	KF_bpf_list_push_front,
10991 	KF_bpf_list_push_back_impl,
10992 	KF_bpf_list_push_back,
10993 	KF_bpf_list_add,
10994 	KF_bpf_list_pop_front,
10995 	KF_bpf_list_pop_back,
10996 	KF_bpf_list_del,
10997 	KF_bpf_list_front,
10998 	KF_bpf_list_back,
10999 	KF_bpf_list_is_first,
11000 	KF_bpf_list_is_last,
11001 	KF_bpf_list_empty,
11002 	KF_bpf_cast_to_kern_ctx,
11003 	KF_bpf_rdonly_cast,
11004 	KF_bpf_rcu_read_lock,
11005 	KF_bpf_rcu_read_unlock,
11006 	KF_bpf_rbtree_remove,
11007 	KF_bpf_rbtree_add_impl,
11008 	KF_bpf_rbtree_add,
11009 	KF_bpf_rbtree_first,
11010 	KF_bpf_rbtree_root,
11011 	KF_bpf_rbtree_left,
11012 	KF_bpf_rbtree_right,
11013 	KF_bpf_dynptr_from_skb,
11014 	KF_bpf_dynptr_from_xdp,
11015 	KF_bpf_dynptr_from_skb_meta,
11016 	KF_bpf_xdp_pull_data,
11017 	KF_bpf_dynptr_slice,
11018 	KF_bpf_dynptr_slice_rdwr,
11019 	KF_bpf_dynptr_clone,
11020 	KF_bpf_percpu_obj_new_impl,
11021 	KF_bpf_percpu_obj_new,
11022 	KF_bpf_percpu_obj_drop_impl,
11023 	KF_bpf_percpu_obj_drop,
11024 	KF_bpf_throw,
11025 	KF_bpf_wq_set_callback,
11026 	KF_bpf_preempt_disable,
11027 	KF_bpf_preempt_enable,
11028 	KF_bpf_iter_css_task_new,
11029 	KF_bpf_session_cookie,
11030 	KF_bpf_get_kmem_cache,
11031 	KF_bpf_local_irq_save,
11032 	KF_bpf_local_irq_restore,
11033 	KF_bpf_iter_num_new,
11034 	KF_bpf_iter_num_next,
11035 	KF_bpf_iter_num_destroy,
11036 	KF_bpf_set_dentry_xattr,
11037 	KF_bpf_remove_dentry_xattr,
11038 	KF_bpf_res_spin_lock,
11039 	KF_bpf_res_spin_unlock,
11040 	KF_bpf_res_spin_lock_irqsave,
11041 	KF_bpf_res_spin_unlock_irqrestore,
11042 	KF_bpf_dynptr_from_file,
11043 	KF_bpf_dynptr_file_discard,
11044 	KF___bpf_trap,
11045 	KF_bpf_task_work_schedule_signal,
11046 	KF_bpf_task_work_schedule_resume,
11047 	KF_bpf_arena_alloc_pages,
11048 	KF_bpf_arena_free_pages,
11049 	KF_bpf_arena_reserve_pages,
11050 	KF_bpf_session_is_return,
11051 	KF_bpf_stream_vprintk,
11052 	KF_bpf_stream_print_stack,
11053 };
11054 
11055 BTF_ID_LIST(special_kfunc_list)
11056 BTF_ID(func, bpf_obj_new_impl)
11057 BTF_ID(func, bpf_obj_new)
11058 BTF_ID(func, bpf_obj_drop_impl)
11059 BTF_ID(func, bpf_obj_drop)
11060 BTF_ID(func, bpf_refcount_acquire_impl)
11061 BTF_ID(func, bpf_refcount_acquire)
11062 BTF_ID(func, bpf_list_push_front_impl)
11063 BTF_ID(func, bpf_list_push_front)
11064 BTF_ID(func, bpf_list_push_back_impl)
11065 BTF_ID(func, bpf_list_push_back)
11066 BTF_ID(func, bpf_list_add)
11067 BTF_ID(func, bpf_list_pop_front)
11068 BTF_ID(func, bpf_list_pop_back)
11069 BTF_ID(func, bpf_list_del)
11070 BTF_ID(func, bpf_list_front)
11071 BTF_ID(func, bpf_list_back)
11072 BTF_ID(func, bpf_list_is_first)
11073 BTF_ID(func, bpf_list_is_last)
11074 BTF_ID(func, bpf_list_empty)
11075 BTF_ID(func, bpf_cast_to_kern_ctx)
11076 BTF_ID(func, bpf_rdonly_cast)
11077 BTF_ID(func, bpf_rcu_read_lock)
11078 BTF_ID(func, bpf_rcu_read_unlock)
11079 BTF_ID(func, bpf_rbtree_remove)
11080 BTF_ID(func, bpf_rbtree_add_impl)
11081 BTF_ID(func, bpf_rbtree_add)
11082 BTF_ID(func, bpf_rbtree_first)
11083 BTF_ID(func, bpf_rbtree_root)
11084 BTF_ID(func, bpf_rbtree_left)
11085 BTF_ID(func, bpf_rbtree_right)
11086 #ifdef CONFIG_NET
11087 BTF_ID(func, bpf_dynptr_from_skb)
11088 BTF_ID(func, bpf_dynptr_from_xdp)
11089 BTF_ID(func, bpf_dynptr_from_skb_meta)
11090 BTF_ID(func, bpf_xdp_pull_data)
11091 #else
11092 BTF_ID_UNUSED
11093 BTF_ID_UNUSED
11094 BTF_ID_UNUSED
11095 BTF_ID_UNUSED
11096 #endif
11097 BTF_ID(func, bpf_dynptr_slice)
11098 BTF_ID(func, bpf_dynptr_slice_rdwr)
11099 BTF_ID(func, bpf_dynptr_clone)
11100 BTF_ID(func, bpf_percpu_obj_new_impl)
11101 BTF_ID(func, bpf_percpu_obj_new)
11102 BTF_ID(func, bpf_percpu_obj_drop_impl)
11103 BTF_ID(func, bpf_percpu_obj_drop)
11104 BTF_ID(func, bpf_throw)
11105 BTF_ID(func, bpf_wq_set_callback)
11106 BTF_ID(func, bpf_preempt_disable)
11107 BTF_ID(func, bpf_preempt_enable)
11108 #ifdef CONFIG_CGROUPS
11109 BTF_ID(func, bpf_iter_css_task_new)
11110 #else
11111 BTF_ID_UNUSED
11112 #endif
11113 #ifdef CONFIG_BPF_EVENTS
11114 BTF_ID(func, bpf_session_cookie)
11115 #else
11116 BTF_ID_UNUSED
11117 #endif
11118 BTF_ID(func, bpf_get_kmem_cache)
11119 BTF_ID(func, bpf_local_irq_save)
11120 BTF_ID(func, bpf_local_irq_restore)
11121 BTF_ID(func, bpf_iter_num_new)
11122 BTF_ID(func, bpf_iter_num_next)
11123 BTF_ID(func, bpf_iter_num_destroy)
11124 #ifdef CONFIG_BPF_LSM
11125 BTF_ID(func, bpf_set_dentry_xattr)
11126 BTF_ID(func, bpf_remove_dentry_xattr)
11127 #else
11128 BTF_ID_UNUSED
11129 BTF_ID_UNUSED
11130 #endif
11131 BTF_ID(func, bpf_res_spin_lock)
11132 BTF_ID(func, bpf_res_spin_unlock)
11133 BTF_ID(func, bpf_res_spin_lock_irqsave)
11134 BTF_ID(func, bpf_res_spin_unlock_irqrestore)
11135 BTF_ID(func, bpf_dynptr_from_file)
11136 BTF_ID(func, bpf_dynptr_file_discard)
11137 BTF_ID(func, __bpf_trap)
11138 BTF_ID(func, bpf_task_work_schedule_signal)
11139 BTF_ID(func, bpf_task_work_schedule_resume)
11140 BTF_ID(func, bpf_arena_alloc_pages)
11141 BTF_ID(func, bpf_arena_free_pages)
11142 BTF_ID(func, bpf_arena_reserve_pages)
11143 #ifdef CONFIG_BPF_EVENTS
11144 BTF_ID(func, bpf_session_is_return)
11145 #else
11146 BTF_ID_UNUSED
11147 #endif
11148 BTF_ID(func, bpf_stream_vprintk)
11149 BTF_ID(func, bpf_stream_print_stack)
11150 
11151 static bool is_bpf_obj_new_kfunc(u32 func_id)
11152 {
11153 	return func_id == special_kfunc_list[KF_bpf_obj_new] ||
11154 	       func_id == special_kfunc_list[KF_bpf_obj_new_impl];
11155 }
11156 
11157 static bool is_bpf_percpu_obj_new_kfunc(u32 func_id)
11158 {
11159 	return func_id == special_kfunc_list[KF_bpf_percpu_obj_new] ||
11160 	       func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl];
11161 }
11162 
11163 static bool is_bpf_obj_drop_kfunc(u32 func_id)
11164 {
11165 	return func_id == special_kfunc_list[KF_bpf_obj_drop] ||
11166 	       func_id == special_kfunc_list[KF_bpf_obj_drop_impl];
11167 }
11168 
11169 static bool is_bpf_percpu_obj_drop_kfunc(u32 func_id)
11170 {
11171 	return func_id == special_kfunc_list[KF_bpf_percpu_obj_drop] ||
11172 	       func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl];
11173 }
11174 
11175 static bool is_bpf_refcount_acquire_kfunc(u32 func_id)
11176 {
11177 	return func_id == special_kfunc_list[KF_bpf_refcount_acquire] ||
11178 	       func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
11179 }
11180 
11181 static bool is_bpf_list_push_kfunc(u32 func_id)
11182 {
11183 	return func_id == special_kfunc_list[KF_bpf_list_push_front] ||
11184 	       func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
11185 	       func_id == special_kfunc_list[KF_bpf_list_push_back] ||
11186 	       func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
11187 	       func_id == special_kfunc_list[KF_bpf_list_add];
11188 }
11189 
11190 static bool is_bpf_rbtree_add_kfunc(u32 func_id)
11191 {
11192 	return func_id == special_kfunc_list[KF_bpf_rbtree_add] ||
11193 	       func_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
11194 }
11195 
11196 static bool is_task_work_add_kfunc(u32 func_id)
11197 {
11198 	return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] ||
11199 	       func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume];
11200 }
11201 
11202 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
11203 {
11204 	if (is_bpf_refcount_acquire_kfunc(meta->func_id) && meta->arg_owning_ref)
11205 		return false;
11206 
11207 	return meta->kfunc_flags & KF_RET_NULL;
11208 }
11209 
11210 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
11211 {
11212 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
11213 }
11214 
11215 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
11216 {
11217 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
11218 }
11219 
11220 static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta)
11221 {
11222 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable];
11223 }
11224 
11225 static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
11226 {
11227 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
11228 }
11229 
11230 bool bpf_is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
11231 {
11232 	return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
11233 }
11234 
11235 static enum kfunc_ptr_arg_type
11236 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, struct bpf_func_state *caller,
11237 		       struct bpf_reg_state *regs, struct bpf_kfunc_call_arg_meta *meta,
11238 		       const struct btf_type *t, const struct btf_type *ref_t,
11239 		       const char *ref_tname, const struct btf_param *args,
11240 		       int arg, int nargs, argno_t argno, struct bpf_reg_state *reg)
11241 {
11242 	bool arg_mem_size = false;
11243 
11244 	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
11245 	    meta->func_id == special_kfunc_list[KF_bpf_session_is_return] ||
11246 	    meta->func_id == special_kfunc_list[KF_bpf_session_cookie])
11247 		return KF_ARG_PTR_TO_CTX;
11248 
11249 	if (arg + 1 < nargs &&
11250 	    (is_kfunc_arg_mem_size(meta->btf, &args[arg + 1], get_func_arg_reg(caller, regs, arg + 1)) ||
11251 	     is_kfunc_arg_const_mem_size(meta->btf, &args[arg + 1], get_func_arg_reg(caller, regs, arg + 1))))
11252 		arg_mem_size = true;
11253 
11254 	/* In this function, we verify the kfunc's BTF as per the argument type,
11255 	 * leaving the rest of the verification with respect to the register
11256 	 * type to our caller. When a set of conditions hold in the BTF type of
11257 	 * arguments, we resolve it to a known kfunc_ptr_arg_type.
11258 	 */
11259 	if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), arg))
11260 		return KF_ARG_PTR_TO_CTX;
11261 
11262 	if (is_kfunc_arg_nullable(meta->btf, &args[arg]) && bpf_register_is_null(reg) &&
11263 	    !arg_mem_size)
11264 		return KF_ARG_PTR_TO_NULL;
11265 
11266 	if (is_kfunc_arg_alloc_obj(meta->btf, &args[arg]))
11267 		return KF_ARG_PTR_TO_ALLOC_BTF_ID;
11268 
11269 	if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[arg]))
11270 		return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
11271 
11272 	if (is_kfunc_arg_dynptr(meta->btf, &args[arg]))
11273 		return KF_ARG_PTR_TO_DYNPTR;
11274 
11275 	if (is_kfunc_arg_iter(meta, arg, &args[arg]))
11276 		return KF_ARG_PTR_TO_ITER;
11277 
11278 	if (is_kfunc_arg_list_head(meta->btf, &args[arg]))
11279 		return KF_ARG_PTR_TO_LIST_HEAD;
11280 
11281 	if (is_kfunc_arg_list_node(meta->btf, &args[arg]))
11282 		return KF_ARG_PTR_TO_LIST_NODE;
11283 
11284 	if (is_kfunc_arg_rbtree_root(meta->btf, &args[arg]))
11285 		return KF_ARG_PTR_TO_RB_ROOT;
11286 
11287 	if (is_kfunc_arg_rbtree_node(meta->btf, &args[arg]))
11288 		return KF_ARG_PTR_TO_RB_NODE;
11289 
11290 	if (is_kfunc_arg_const_str(meta->btf, &args[arg]))
11291 		return KF_ARG_PTR_TO_CONST_STR;
11292 
11293 	if (is_kfunc_arg_map(meta->btf, &args[arg]))
11294 		return KF_ARG_PTR_TO_MAP;
11295 
11296 	if (is_kfunc_arg_wq(meta->btf, &args[arg]))
11297 		return KF_ARG_PTR_TO_WORKQUEUE;
11298 
11299 	if (is_kfunc_arg_timer(meta->btf, &args[arg]))
11300 		return KF_ARG_PTR_TO_TIMER;
11301 
11302 	if (is_kfunc_arg_task_work(meta->btf, &args[arg]))
11303 		return KF_ARG_PTR_TO_TASK_WORK;
11304 
11305 	if (is_kfunc_arg_irq_flag(meta->btf, &args[arg]))
11306 		return KF_ARG_PTR_TO_IRQ_FLAG;
11307 
11308 	if (is_kfunc_arg_res_spin_lock(meta->btf, &args[arg]))
11309 		return KF_ARG_PTR_TO_RES_SPIN_LOCK;
11310 
11311 	if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
11312 		if (!btf_type_is_struct(ref_t)) {
11313 			verbose(env, "kernel function %s %s pointer type %s %s is not supported\n",
11314 				meta->func_name, reg_arg_name(env, argno),
11315 				btf_type_str(ref_t), ref_tname);
11316 			return -EINVAL;
11317 		}
11318 		return KF_ARG_PTR_TO_BTF_ID;
11319 	}
11320 
11321 	if (is_kfunc_arg_callback(env, meta->btf, &args[arg]))
11322 		return KF_ARG_PTR_TO_CALLBACK;
11323 
11324 	/* This is the catch all argument type of register types supported by
11325 	 * check_helper_mem_access. However, we only allow when argument type is
11326 	 * pointer to scalar, or struct composed (recursively) of scalars. When
11327 	 * arg_mem_size is true, the pointer can be void *.
11328 	 */
11329 	if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
11330 	    (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
11331 		verbose(env, "%s pointer type %s %s must point to %sscalar, or struct with scalar\n",
11332 			reg_arg_name(env, argno),
11333 			btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
11334 		return -EINVAL;
11335 	}
11336 	return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
11337 }
11338 
11339 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
11340 					struct bpf_reg_state *reg,
11341 					const struct btf_type *ref_t,
11342 					const char *ref_tname, u32 ref_id,
11343 					struct bpf_kfunc_call_arg_meta *meta,
11344 					int arg, argno_t argno)
11345 {
11346 	const struct btf_type *reg_ref_t;
11347 	bool strict_type_match = false;
11348 	const struct btf *reg_btf;
11349 	const char *reg_ref_tname;
11350 	bool taking_projection;
11351 	bool struct_same;
11352 	u32 reg_ref_id;
11353 
11354 	if (base_type(reg->type) == PTR_TO_BTF_ID) {
11355 		reg_btf = reg->btf;
11356 		reg_ref_id = reg->btf_id;
11357 	} else {
11358 		reg_btf = btf_vmlinux;
11359 		reg_ref_id = *reg2btf_ids[base_type(reg->type)];
11360 	}
11361 
11362 	/* Enforce strict type matching for calls to kfuncs that are acquiring
11363 	 * or releasing a reference, or are no-cast aliases. We do _not_
11364 	 * enforce strict matching for kfuncs by default,
11365 	 * as we want to enable BPF programs to pass types that are bitwise
11366 	 * equivalent without forcing them to explicitly cast with something
11367 	 * like bpf_cast_to_kern_ctx().
11368 	 *
11369 	 * For example, say we had a type like the following:
11370 	 *
11371 	 * struct bpf_cpumask {
11372 	 *	cpumask_t cpumask;
11373 	 *	refcount_t usage;
11374 	 * };
11375 	 *
11376 	 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
11377 	 * to a struct cpumask, so it would be safe to pass a struct
11378 	 * bpf_cpumask * to a kfunc expecting a struct cpumask *.
11379 	 *
11380 	 * The philosophy here is similar to how we allow scalars of different
11381 	 * types to be passed to kfuncs as long as the size is the same. The
11382 	 * only difference here is that we're simply allowing
11383 	 * btf_struct_ids_match() to walk the struct at the 0th offset, and
11384 	 * resolve types.
11385 	 */
11386 	if ((is_kfunc_release(meta) && reg_is_referenced(env, reg)) ||
11387 	    btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
11388 		strict_type_match = true;
11389 
11390 	WARN_ON_ONCE(is_kfunc_release(meta) && !tnum_is_const(reg->var_off));
11391 
11392 	reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
11393 	reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
11394 	struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->var_off.value,
11395 					   meta->btf, ref_id, strict_type_match);
11396 	/* If kfunc is accepting a projection type (ie. __sk_buff), it cannot
11397 	 * actually use it -- it must cast to the underlying type. So we allow
11398 	 * caller to pass in the underlying type.
11399 	 */
11400 	taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname);
11401 	if (!taking_projection && !struct_same) {
11402 		verbose(env, "kernel function %s %s expected pointer to %s %s but %s has a pointer to %s %s\n",
11403 			meta->func_name, reg_arg_name(env, argno),
11404 			btf_type_str(ref_t), ref_tname, reg_arg_name(env, argno),
11405 			btf_type_str(reg_ref_t), reg_ref_tname);
11406 		return -EINVAL;
11407 	}
11408 	return 0;
11409 }
11410 
11411 static int process_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno,
11412 			     struct bpf_kfunc_call_arg_meta *meta)
11413 {
11414 	int err, spi, kfunc_class = IRQ_NATIVE_KFUNC;
11415 	bool irq_save;
11416 
11417 	if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save] ||
11418 	    meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) {
11419 		irq_save = true;
11420 		if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
11421 			kfunc_class = IRQ_LOCK_KFUNC;
11422 	} else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore] ||
11423 		   meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) {
11424 		irq_save = false;
11425 		if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
11426 			kfunc_class = IRQ_LOCK_KFUNC;
11427 	} else {
11428 		verifier_bug(env, "unknown irq flags kfunc");
11429 		return -EFAULT;
11430 	}
11431 
11432 	if (irq_save) {
11433 		if (!is_irq_flag_reg_valid_uninit(env, reg)) {
11434 			verbose(env, "expected uninitialized irq flag as %s\n",
11435 				reg_arg_name(env, argno));
11436 			return -EINVAL;
11437 		}
11438 
11439 		err = check_mem_access(env, env->insn_idx, reg, argno, 0, BPF_DW,
11440 				       BPF_WRITE, -1, false, false);
11441 		if (err)
11442 			return err;
11443 
11444 		err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx, kfunc_class);
11445 		if (err)
11446 			return err;
11447 	} else {
11448 		err = is_irq_flag_reg_valid_init(env, reg);
11449 		if (err) {
11450 			verbose(env, "expected an initialized irq flag as %s\n",
11451 				reg_arg_name(env, argno));
11452 			return err;
11453 		}
11454 
11455 		spi = irq_flag_get_spi(env, reg);
11456 		if (spi < 0)
11457 			return spi;
11458 
11459 		mark_stack_slots_scratched(env, spi, 1);
11460 
11461 		err = unmark_stack_slot_irq_flag(env, reg, kfunc_class);
11462 		if (err)
11463 			return err;
11464 	}
11465 	return 0;
11466 }
11467 
11468 
11469 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
11470 {
11471 	struct btf_record *rec = reg_btf_record(reg);
11472 
11473 	if (!env->cur_state->active_locks) {
11474 		verifier_bug(env, "%s w/o active lock", __func__);
11475 		return -EFAULT;
11476 	}
11477 
11478 	if (type_flag(reg->type) & NON_OWN_REF) {
11479 		verifier_bug(env, "NON_OWN_REF already set");
11480 		return -EFAULT;
11481 	}
11482 
11483 	reg->type |= NON_OWN_REF;
11484 	if (rec->refcount_off >= 0)
11485 		reg->type |= MEM_RCU;
11486 
11487 	return 0;
11488 }
11489 
11490 static void ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 id)
11491 {
11492 	struct bpf_func_state *unused;
11493 	struct bpf_reg_state *reg;
11494 
11495 	WARN_ON_ONCE(release_reference_nomark(env->cur_state, id));
11496 
11497 	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
11498 		if (reg->id == id) {
11499 			reg->id = 0;
11500 			ref_set_non_owning(env, reg);
11501 		}
11502 	}));
11503 
11504 	return;
11505 }
11506 
11507 /* Implementation details:
11508  *
11509  * Each register points to some region of memory, which we define as an
11510  * allocation. Each allocation may embed a bpf_spin_lock which protects any
11511  * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
11512  * allocation. The lock and the data it protects are colocated in the same
11513  * memory region.
11514  *
11515  * Hence, everytime a register holds a pointer value pointing to such
11516  * allocation, the verifier preserves a unique reg->id for it.
11517  *
11518  * The verifier remembers the lock 'ptr' and the lock 'id' whenever
11519  * bpf_spin_lock is called.
11520  *
11521  * To enable this, lock state in the verifier captures two values:
11522  *	active_lock.ptr = Register's type specific pointer
11523  *	active_lock.id  = A unique ID for each register pointer value
11524  *
11525  * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
11526  * supported register types.
11527  *
11528  * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
11529  * allocated objects is the reg->btf pointer.
11530  *
11531  * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
11532  * can establish the provenance of the map value statically for each distinct
11533  * lookup into such maps. They always contain a single map value hence unique
11534  * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
11535  *
11536  * So, in case of global variables, they use array maps with max_entries = 1,
11537  * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
11538  * into the same map value as max_entries is 1, as described above).
11539  *
11540  * In case of inner map lookups, the inner map pointer has same map_ptr as the
11541  * outer map pointer (in verifier context), but each lookup into an inner map
11542  * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
11543  * maps from the same outer map share the same map_ptr as active_lock.ptr, they
11544  * will get different reg->id assigned to each lookup, hence different
11545  * active_lock.id.
11546  *
11547  * In case of allocated objects, active_lock.ptr is the reg->btf, and the
11548  * reg->id is a unique ID preserved after the NULL pointer check on the pointer
11549  * returned from bpf_obj_new. Each allocation receives a new reg->id.
11550  */
11551 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
11552 {
11553 	struct bpf_reference_state *s;
11554 	void *ptr;
11555 	u32 id;
11556 
11557 	switch ((int)reg->type) {
11558 	case PTR_TO_MAP_VALUE:
11559 		ptr = reg->map_ptr;
11560 		break;
11561 	case PTR_TO_BTF_ID | MEM_ALLOC:
11562 		ptr = reg->btf;
11563 		break;
11564 	default:
11565 		verifier_bug(env, "unknown reg type for lock check");
11566 		return -EFAULT;
11567 	}
11568 	id = reg->id;
11569 
11570 	if (!env->cur_state->active_locks)
11571 		return -EINVAL;
11572 	s = find_lock_state(env->cur_state, REF_TYPE_LOCK_MASK, id, ptr);
11573 	if (!s) {
11574 		verbose(env, "held lock and object are not in the same allocation\n");
11575 		return -EINVAL;
11576 	}
11577 	return 0;
11578 }
11579 
11580 static bool is_bpf_list_api_kfunc(u32 btf_id)
11581 {
11582 	return is_bpf_list_push_kfunc(btf_id) ||
11583 	       btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
11584 	       btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
11585 	       btf_id == special_kfunc_list[KF_bpf_list_del] ||
11586 	       btf_id == special_kfunc_list[KF_bpf_list_front] ||
11587 	       btf_id == special_kfunc_list[KF_bpf_list_back] ||
11588 	       btf_id == special_kfunc_list[KF_bpf_list_is_first] ||
11589 	       btf_id == special_kfunc_list[KF_bpf_list_is_last] ||
11590 	       btf_id == special_kfunc_list[KF_bpf_list_empty];
11591 }
11592 
11593 static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
11594 {
11595 	return is_bpf_rbtree_add_kfunc(btf_id) ||
11596 	       btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11597 	       btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
11598 	       btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
11599 	       btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
11600 	       btf_id == special_kfunc_list[KF_bpf_rbtree_right];
11601 }
11602 
11603 static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
11604 {
11605 	return btf_id == special_kfunc_list[KF_bpf_iter_num_new] ||
11606 	       btf_id == special_kfunc_list[KF_bpf_iter_num_next] ||
11607 	       btf_id == special_kfunc_list[KF_bpf_iter_num_destroy];
11608 }
11609 
11610 static bool is_bpf_graph_api_kfunc(u32 btf_id)
11611 {
11612 	return is_bpf_list_api_kfunc(btf_id) ||
11613 	       is_bpf_rbtree_api_kfunc(btf_id) ||
11614 	       is_bpf_refcount_acquire_kfunc(btf_id);
11615 }
11616 
11617 static bool is_bpf_res_spin_lock_kfunc(u32 btf_id)
11618 {
11619 	return btf_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
11620 	       btf_id == special_kfunc_list[KF_bpf_res_spin_unlock] ||
11621 	       btf_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
11622 	       btf_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore];
11623 }
11624 
11625 static bool is_bpf_arena_kfunc(u32 btf_id)
11626 {
11627 	return btf_id == special_kfunc_list[KF_bpf_arena_alloc_pages] ||
11628 	       btf_id == special_kfunc_list[KF_bpf_arena_free_pages] ||
11629 	       btf_id == special_kfunc_list[KF_bpf_arena_reserve_pages];
11630 }
11631 
11632 static bool is_bpf_stream_kfunc(u32 btf_id)
11633 {
11634 	return btf_id == special_kfunc_list[KF_bpf_stream_vprintk] ||
11635 	       btf_id == special_kfunc_list[KF_bpf_stream_print_stack];
11636 }
11637 
11638 static bool kfunc_spin_allowed(u32 btf_id)
11639 {
11640 	return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id) ||
11641 	       is_bpf_res_spin_lock_kfunc(btf_id) || is_bpf_arena_kfunc(btf_id) ||
11642 	       is_bpf_stream_kfunc(btf_id);
11643 }
11644 
11645 static bool is_sync_callback_calling_kfunc(u32 btf_id)
11646 {
11647 	return is_bpf_rbtree_add_kfunc(btf_id);
11648 }
11649 
11650 static bool is_async_callback_calling_kfunc(u32 btf_id)
11651 {
11652 	return is_bpf_wq_set_callback_kfunc(btf_id) ||
11653 	       is_task_work_add_kfunc(btf_id);
11654 }
11655 
11656 bool bpf_is_throw_kfunc(struct bpf_insn *insn)
11657 {
11658 	return bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
11659 	       insn->imm == special_kfunc_list[KF_bpf_throw];
11660 }
11661 
11662 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id)
11663 {
11664 	return btf_id == special_kfunc_list[KF_bpf_wq_set_callback];
11665 }
11666 
11667 static bool is_callback_calling_kfunc(u32 btf_id)
11668 {
11669 	return is_sync_callback_calling_kfunc(btf_id) ||
11670 	       is_async_callback_calling_kfunc(btf_id);
11671 }
11672 
11673 static bool is_rbtree_lock_required_kfunc(u32 btf_id)
11674 {
11675 	return is_bpf_rbtree_api_kfunc(btf_id);
11676 }
11677 
11678 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
11679 					  enum btf_field_type head_field_type,
11680 					  u32 kfunc_btf_id)
11681 {
11682 	bool ret;
11683 
11684 	switch (head_field_type) {
11685 	case BPF_LIST_HEAD:
11686 		ret = is_bpf_list_api_kfunc(kfunc_btf_id);
11687 		break;
11688 	case BPF_RB_ROOT:
11689 		ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
11690 		break;
11691 	default:
11692 		verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
11693 			btf_field_type_name(head_field_type));
11694 		return false;
11695 	}
11696 
11697 	if (!ret)
11698 		verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
11699 			btf_field_type_name(head_field_type));
11700 	return ret;
11701 }
11702 
11703 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
11704 					  enum btf_field_type node_field_type,
11705 					  u32 kfunc_btf_id)
11706 {
11707 	bool ret;
11708 
11709 	switch (node_field_type) {
11710 	case BPF_LIST_NODE:
11711 		ret = is_bpf_list_push_kfunc(kfunc_btf_id) ||
11712 		      kfunc_btf_id == special_kfunc_list[KF_bpf_list_del] ||
11713 		      kfunc_btf_id == special_kfunc_list[KF_bpf_list_is_first] ||
11714 		      kfunc_btf_id == special_kfunc_list[KF_bpf_list_is_last];
11715 		break;
11716 	case BPF_RB_NODE:
11717 		ret = (is_bpf_rbtree_add_kfunc(kfunc_btf_id) ||
11718 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11719 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
11720 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
11721 		break;
11722 	default:
11723 		verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
11724 			btf_field_type_name(node_field_type));
11725 		return false;
11726 	}
11727 
11728 	if (!ret)
11729 		verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
11730 			btf_field_type_name(node_field_type));
11731 	return ret;
11732 }
11733 
11734 static int
11735 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
11736 				   struct bpf_reg_state *reg, argno_t argno,
11737 				   struct bpf_kfunc_call_arg_meta *meta,
11738 				   enum btf_field_type head_field_type,
11739 				   struct btf_field **head_field)
11740 {
11741 	const char *head_type_name;
11742 	struct btf_field *field;
11743 	struct btf_record *rec;
11744 	u32 head_off;
11745 
11746 	if (meta->btf != btf_vmlinux) {
11747 		verifier_bug(env, "unexpected btf mismatch in kfunc call");
11748 		return -EFAULT;
11749 	}
11750 
11751 	if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
11752 		return -EFAULT;
11753 
11754 	head_type_name = btf_field_type_name(head_field_type);
11755 	if (!tnum_is_const(reg->var_off)) {
11756 		verbose(env,
11757 			"%s doesn't have constant offset. %s has to be at the constant offset\n",
11758 			reg_arg_name(env, argno), head_type_name);
11759 		return -EINVAL;
11760 	}
11761 
11762 	rec = reg_btf_record(reg);
11763 	head_off = reg->var_off.value;
11764 	field = btf_record_find(rec, head_off, head_field_type);
11765 	if (!field) {
11766 		verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
11767 		return -EINVAL;
11768 	}
11769 
11770 	/* All functions require bpf_list_head to be protected using a bpf_spin_lock */
11771 	if (check_reg_allocation_locked(env, reg)) {
11772 		verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
11773 			rec->spin_lock_off, head_type_name);
11774 		return -EINVAL;
11775 	}
11776 
11777 	if (*head_field) {
11778 		verifier_bug(env, "repeating %s arg", head_type_name);
11779 		return -EFAULT;
11780 	}
11781 	*head_field = field;
11782 	return 0;
11783 }
11784 
11785 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
11786 					   struct bpf_reg_state *reg, argno_t argno,
11787 					   struct bpf_kfunc_call_arg_meta *meta)
11788 {
11789 	return __process_kf_arg_ptr_to_graph_root(env, reg, argno, meta, BPF_LIST_HEAD,
11790 							  &meta->arg_list_head.field);
11791 }
11792 
11793 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
11794 					     struct bpf_reg_state *reg, argno_t argno,
11795 					     struct bpf_kfunc_call_arg_meta *meta)
11796 {
11797 	return __process_kf_arg_ptr_to_graph_root(env, reg, argno, meta, BPF_RB_ROOT,
11798 							  &meta->arg_rbtree_root.field);
11799 }
11800 
11801 static int
11802 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
11803 				   struct bpf_reg_state *reg, argno_t argno,
11804 				   struct bpf_kfunc_call_arg_meta *meta,
11805 				   enum btf_field_type head_field_type,
11806 				   enum btf_field_type node_field_type,
11807 				   struct btf_field **node_field)
11808 {
11809 	const char *node_type_name;
11810 	const struct btf_type *et, *t;
11811 	struct btf_field *field;
11812 	u32 node_off;
11813 
11814 	if (meta->btf != btf_vmlinux) {
11815 		verifier_bug(env, "unexpected btf mismatch in kfunc call");
11816 		return -EFAULT;
11817 	}
11818 
11819 	if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
11820 		return -EFAULT;
11821 
11822 	node_type_name = btf_field_type_name(node_field_type);
11823 	if (!tnum_is_const(reg->var_off)) {
11824 		verbose(env,
11825 			"%s doesn't have constant offset. %s has to be at the constant offset\n",
11826 			reg_arg_name(env, argno), node_type_name);
11827 		return -EINVAL;
11828 	}
11829 
11830 	node_off = reg->var_off.value;
11831 	field = reg_find_field_offset(reg, node_off, node_field_type);
11832 	if (!field) {
11833 		verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
11834 		return -EINVAL;
11835 	}
11836 
11837 	field = *node_field;
11838 
11839 	et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
11840 	t = btf_type_by_id(reg->btf, reg->btf_id);
11841 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
11842 				  field->graph_root.value_btf_id, true)) {
11843 		verbose(env, "operation on %s expects arg#1 %s at offset=%d "
11844 			"in struct %s, but arg is at offset=%d in struct %s\n",
11845 			btf_field_type_name(head_field_type),
11846 			btf_field_type_name(node_field_type),
11847 			field->graph_root.node_offset,
11848 			btf_name_by_offset(field->graph_root.btf, et->name_off),
11849 			node_off, btf_name_by_offset(reg->btf, t->name_off));
11850 		return -EINVAL;
11851 	}
11852 	meta->arg_btf = reg->btf;
11853 	meta->arg_btf_id = reg->btf_id;
11854 
11855 	if (node_off != field->graph_root.node_offset) {
11856 		verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
11857 			node_off, btf_field_type_name(node_field_type),
11858 			field->graph_root.node_offset,
11859 			btf_name_by_offset(field->graph_root.btf, et->name_off));
11860 		return -EINVAL;
11861 	}
11862 
11863 	return 0;
11864 }
11865 
11866 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
11867 					   struct bpf_reg_state *reg, argno_t argno,
11868 					   struct bpf_kfunc_call_arg_meta *meta)
11869 {
11870 	return __process_kf_arg_ptr_to_graph_node(env, reg, argno, meta,
11871 						  BPF_LIST_HEAD, BPF_LIST_NODE,
11872 						  &meta->arg_list_head.field);
11873 }
11874 
11875 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
11876 					     struct bpf_reg_state *reg, argno_t argno,
11877 					     struct bpf_kfunc_call_arg_meta *meta)
11878 {
11879 	return __process_kf_arg_ptr_to_graph_node(env, reg, argno, meta,
11880 						  BPF_RB_ROOT, BPF_RB_NODE,
11881 						  &meta->arg_rbtree_root.field);
11882 }
11883 
11884 /*
11885  * css_task iter allowlist is needed to avoid dead locking on css_set_lock.
11886  * LSM hooks and iters (both sleepable and non-sleepable) are safe.
11887  * Any sleepable progs are also safe since bpf_check_attach_target() enforce
11888  * them can only be attached to some specific hook points.
11889  */
11890 static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
11891 {
11892 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
11893 
11894 	switch (prog_type) {
11895 	case BPF_PROG_TYPE_LSM:
11896 		return true;
11897 	case BPF_PROG_TYPE_TRACING:
11898 		if (env->prog->expected_attach_type == BPF_TRACE_ITER)
11899 			return true;
11900 		fallthrough;
11901 	default:
11902 		return in_sleepable(env);
11903 	}
11904 }
11905 
11906 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
11907 			    int insn_idx)
11908 {
11909 	const char *func_name = meta->func_name, *ref_tname;
11910 	struct bpf_func_state *caller = cur_func(env);
11911 	struct bpf_reg_state *regs = cur_regs(env);
11912 	const struct btf *btf = meta->btf;
11913 	const struct btf_param *args;
11914 	struct btf_record *rec;
11915 	u32 i, nargs;
11916 	int ret;
11917 
11918 	args = (const struct btf_param *)(meta->func_proto + 1);
11919 	nargs = btf_type_vlen(meta->func_proto);
11920 	if (nargs > MAX_BPF_FUNC_ARGS) {
11921 		verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
11922 			MAX_BPF_FUNC_ARGS);
11923 		return -EINVAL;
11924 	}
11925 	if (nargs > MAX_BPF_FUNC_REG_ARGS && !bpf_jit_supports_stack_args()) {
11926 		verbose(env, "JIT does not support kfunc %s() with %d args\n",
11927 			func_name, nargs);
11928 		return -ENOTSUPP;
11929 	}
11930 
11931 	ret = check_outgoing_stack_args(env, caller, nargs);
11932 	if (ret)
11933 		return ret;
11934 
11935 	/* Check that BTF function arguments match actual types that the
11936 	 * verifier sees.
11937 	 */
11938 	for (i = 0; i < nargs; i++) {
11939 		struct bpf_reg_state *reg = get_func_arg_reg(caller, regs, i);
11940 		const struct btf_type *t, *ref_t, *resolve_ret;
11941 		enum bpf_arg_type arg_type = ARG_DONTCARE;
11942 		argno_t argno = argno_from_arg(i + 1);
11943 		int regno = reg_from_argno(argno);
11944 		u32 ref_id, type_size;
11945 		bool is_ret_buf_sz = false;
11946 		int kf_arg_type;
11947 
11948 		if (is_kfunc_arg_prog_aux(btf, &args[i])) {
11949 			/* Reject repeated use bpf_prog_aux */
11950 			if (meta->arg_prog) {
11951 				verifier_bug(env, "Only 1 prog->aux argument supported per-kfunc");
11952 				return -EFAULT;
11953 			}
11954 			if (regno < 0) {
11955 				verbose(env, "%s prog->aux cannot be a stack argument\n",
11956 					reg_arg_name(env, argno));
11957 				return -EINVAL;
11958 			}
11959 			meta->arg_prog = true;
11960 			cur_aux(env)->arg_prog = regno;
11961 			continue;
11962 		}
11963 
11964 		if (is_kfunc_arg_ignore(btf, &args[i]) || is_kfunc_arg_implicit(meta, i))
11965 			continue;
11966 
11967 		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
11968 
11969 		if (btf_type_is_scalar(t)) {
11970 			if (reg->type != SCALAR_VALUE) {
11971 				verbose(env, "%s is not a scalar\n", reg_arg_name(env, argno));
11972 				return -EINVAL;
11973 			}
11974 
11975 			if (is_kfunc_arg_constant(meta->btf, &args[i])) {
11976 				if (meta->arg_constant.found) {
11977 					verifier_bug(env, "only one constant argument permitted");
11978 					return -EFAULT;
11979 				}
11980 				if (!tnum_is_const(reg->var_off)) {
11981 					verbose(env, "%s must be a known constant\n",
11982 						reg_arg_name(env, argno));
11983 					return -EINVAL;
11984 				}
11985 				if (regno >= 0)
11986 					ret = mark_chain_precision(env, regno);
11987 				else
11988 					ret = mark_stack_arg_precision(env, i);
11989 				if (ret < 0)
11990 					return ret;
11991 				meta->arg_constant.found = true;
11992 				meta->arg_constant.value = reg->var_off.value;
11993 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
11994 				meta->r0_rdonly = true;
11995 				is_ret_buf_sz = true;
11996 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
11997 				is_ret_buf_sz = true;
11998 			}
11999 
12000 			if (is_ret_buf_sz) {
12001 				if (meta->r0_size) {
12002 					verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
12003 					return -EINVAL;
12004 				}
12005 
12006 				if (!tnum_is_const(reg->var_off)) {
12007 					verbose(env, "%s is not a const\n",
12008 						reg_arg_name(env, argno));
12009 					return -EINVAL;
12010 				}
12011 
12012 				meta->r0_size = reg->var_off.value;
12013 				if (regno >= 0)
12014 					ret = mark_chain_precision(env, regno);
12015 				else
12016 					ret = mark_stack_arg_precision(env, i);
12017 				if (ret)
12018 					return ret;
12019 			}
12020 			continue;
12021 		}
12022 
12023 		if (!btf_type_is_ptr(t)) {
12024 			verbose(env, "Unrecognized %s type %s\n",
12025 				reg_arg_name(env, argno), btf_type_str(t));
12026 			return -EINVAL;
12027 		}
12028 
12029 		if ((bpf_register_is_null(reg) || type_may_be_null(reg->type)) &&
12030 		    !is_kfunc_arg_nullable(meta->btf, &args[i])) {
12031 			verbose(env, "Possibly NULL pointer passed to trusted %s\n",
12032 				reg_arg_name(env, argno));
12033 			return -EACCES;
12034 		}
12035 
12036 		if (regno == meta->release_regno && !is_kfunc_arg_dynptr(meta->btf, &args[i]) &&
12037 		    !reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) {
12038 			verbose(env, "release kfunc %s expects referenced PTR_TO_BTF_ID passed to %s\n",
12039 				func_name, reg_arg_name(env, argno));
12040 			return -EINVAL;
12041 		}
12042 
12043 		if (reg_is_referenced(env, reg))
12044 			update_ref_obj(&meta->ref_obj, reg);
12045 
12046 		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
12047 		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12048 
12049 		kf_arg_type = get_kfunc_ptr_arg_type(env, caller, regs, meta, t, ref_t, ref_tname,
12050 						     args, i, nargs, argno, reg);
12051 		if (kf_arg_type < 0)
12052 			return kf_arg_type;
12053 
12054 		switch (kf_arg_type) {
12055 		case KF_ARG_PTR_TO_NULL:
12056 			continue;
12057 		case KF_ARG_PTR_TO_MAP:
12058 			if (!reg->map_ptr) {
12059 				verbose(env, "pointer in %s isn't map pointer\n",
12060 					reg_arg_name(env, argno));
12061 				return -EINVAL;
12062 			}
12063 			if (meta->map.ptr && (reg->map_ptr->record->wq_off >= 0 ||
12064 					      reg->map_ptr->record->task_work_off >= 0)) {
12065 				/* Use map_uid (which is unique id of inner map) to reject:
12066 				 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
12067 				 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
12068 				 * if (inner_map1 && inner_map2) {
12069 				 *     wq = bpf_map_lookup_elem(inner_map1);
12070 				 *     if (wq)
12071 				 *         // mismatch would have been allowed
12072 				 *         bpf_wq_init(wq, inner_map2);
12073 				 * }
12074 				 *
12075 				 * Comparing map_ptr is enough to distinguish normal and outer maps.
12076 				 */
12077 				if (meta->map.ptr != reg->map_ptr ||
12078 				    meta->map.uid != reg->map_uid) {
12079 					if (reg->map_ptr->record->task_work_off >= 0) {
12080 						verbose(env,
12081 							"bpf_task_work pointer in R2 map_uid=%d doesn't match map pointer in R3 map_uid=%d\n",
12082 							meta->map.uid, reg->map_uid);
12083 						return -EINVAL;
12084 					}
12085 					verbose(env,
12086 						"workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
12087 						meta->map.uid, reg->map_uid);
12088 					return -EINVAL;
12089 				}
12090 			}
12091 			meta->map.ptr = reg->map_ptr;
12092 			meta->map.uid = reg->map_uid;
12093 			fallthrough;
12094 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
12095 		case KF_ARG_PTR_TO_BTF_ID:
12096 			if (!is_trusted_reg(env, reg)) {
12097 				if (!is_kfunc_rcu(meta)) {
12098 					verbose(env, "%s must be referenced or trusted\n",
12099 						reg_arg_name(env, argno));
12100 					return -EINVAL;
12101 				}
12102 				if (!is_rcu_reg(reg)) {
12103 					verbose(env, "%s must be a rcu pointer\n",
12104 						reg_arg_name(env, argno));
12105 					return -EINVAL;
12106 				}
12107 			}
12108 			fallthrough;
12109 		case KF_ARG_PTR_TO_ITER:
12110 		case KF_ARG_PTR_TO_LIST_HEAD:
12111 		case KF_ARG_PTR_TO_LIST_NODE:
12112 		case KF_ARG_PTR_TO_RB_ROOT:
12113 		case KF_ARG_PTR_TO_RB_NODE:
12114 		case KF_ARG_PTR_TO_MEM:
12115 		case KF_ARG_PTR_TO_MEM_SIZE:
12116 		case KF_ARG_PTR_TO_CALLBACK:
12117 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12118 		case KF_ARG_PTR_TO_CONST_STR:
12119 		case KF_ARG_PTR_TO_WORKQUEUE:
12120 		case KF_ARG_PTR_TO_TIMER:
12121 		case KF_ARG_PTR_TO_TASK_WORK:
12122 		case KF_ARG_PTR_TO_IRQ_FLAG:
12123 		case KF_ARG_PTR_TO_RES_SPIN_LOCK:
12124 			break;
12125 		case KF_ARG_PTR_TO_DYNPTR:
12126 			arg_type = ARG_PTR_TO_DYNPTR;
12127 			break;
12128 		case KF_ARG_PTR_TO_CTX:
12129 			arg_type = ARG_PTR_TO_CTX;
12130 			break;
12131 		default:
12132 			verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type);
12133 			return -EFAULT;
12134 		}
12135 
12136 		if (regno == meta->release_regno)
12137 			arg_type |= OBJ_RELEASE;
12138 		ret = check_func_arg_reg_off(env, reg, argno, arg_type);
12139 		if (ret < 0)
12140 			return ret;
12141 
12142 		switch (kf_arg_type) {
12143 		case KF_ARG_PTR_TO_CTX:
12144 			if (reg->type != PTR_TO_CTX) {
12145 				verbose(env, "%s expected pointer to ctx, but got %s\n",
12146 					reg_arg_name(env, argno), reg_type_str(env, reg->type));
12147 				return -EINVAL;
12148 			}
12149 
12150 			if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12151 				ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
12152 				if (ret < 0)
12153 					return -EINVAL;
12154 				meta->ret_btf_id  = ret;
12155 			}
12156 			break;
12157 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
12158 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
12159 				if (!is_bpf_obj_drop_kfunc(meta->func_id)) {
12160 					verbose(env, "%s expected for bpf_obj_drop()\n",
12161 						reg_arg_name(env, argno));
12162 					return -EINVAL;
12163 				}
12164 			} else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
12165 				if (!is_bpf_percpu_obj_drop_kfunc(meta->func_id)) {
12166 					verbose(env, "%s expected for bpf_percpu_obj_drop()\n",
12167 						reg_arg_name(env, argno));
12168 					return -EINVAL;
12169 				}
12170 			} else {
12171 				verbose(env, "%s expected pointer to allocated object\n",
12172 					reg_arg_name(env, argno));
12173 				return -EINVAL;
12174 			}
12175 			if (!reg_is_referenced(env, reg)) {
12176 				verbose(env, "allocated object must be referenced\n");
12177 				return -EINVAL;
12178 			}
12179 			if (meta->btf == btf_vmlinux) {
12180 				meta->arg_btf = reg->btf;
12181 				meta->arg_btf_id = reg->btf_id;
12182 			}
12183 			break;
12184 		case KF_ARG_PTR_TO_DYNPTR:
12185 		{
12186 			enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
12187 
12188 			if (is_kfunc_arg_uninit(btf, &args[i]))
12189 				dynptr_arg_type |= MEM_UNINIT;
12190 
12191 			if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
12192 				dynptr_arg_type |= DYNPTR_TYPE_SKB;
12193 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
12194 				dynptr_arg_type |= DYNPTR_TYPE_XDP;
12195 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) {
12196 				dynptr_arg_type |= DYNPTR_TYPE_SKB_META;
12197 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
12198 				dynptr_arg_type |= DYNPTR_TYPE_FILE;
12199 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) {
12200 				dynptr_arg_type |= DYNPTR_TYPE_FILE | OBJ_RELEASE;
12201 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
12202 				   (dynptr_arg_type & MEM_UNINIT)) {
12203 				enum bpf_dynptr_type parent_type = meta->dynptr.type;
12204 
12205 				if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
12206 					verifier_bug(env, "no dynptr type for parent of clone");
12207 					return -EFAULT;
12208 				}
12209 
12210 				dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
12211 			}
12212 
12213 			ret = process_dynptr_func(env, reg, argno, insn_idx, dynptr_arg_type,
12214 						  &meta->ref_obj, &meta->dynptr);
12215 			if (ret < 0)
12216 				return ret;
12217 			break;
12218 		}
12219 		case KF_ARG_PTR_TO_ITER:
12220 			if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) {
12221 				if (!check_css_task_iter_allowlist(env)) {
12222 					verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n");
12223 					return -EINVAL;
12224 				}
12225 			}
12226 			ret = process_iter_arg(env, reg, argno, insn_idx, meta);
12227 			if (ret < 0)
12228 				return ret;
12229 			break;
12230 		case KF_ARG_PTR_TO_LIST_HEAD:
12231 			if (reg->type != PTR_TO_MAP_VALUE &&
12232 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12233 				verbose(env, "%s expected pointer to map value or allocated object\n",
12234 					reg_arg_name(env, argno));
12235 				return -EINVAL;
12236 			}
12237 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) &&
12238 			    !reg_is_referenced(env, reg)) {
12239 				verbose(env, "allocated object must be referenced\n");
12240 				return -EINVAL;
12241 			}
12242 			ret = process_kf_arg_ptr_to_list_head(env, reg, argno, meta);
12243 			if (ret < 0)
12244 				return ret;
12245 			break;
12246 		case KF_ARG_PTR_TO_RB_ROOT:
12247 			if (reg->type != PTR_TO_MAP_VALUE &&
12248 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12249 				verbose(env, "%s expected pointer to map value or allocated object\n",
12250 					reg_arg_name(env, argno));
12251 				return -EINVAL;
12252 			}
12253 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) &&
12254 			    !reg_is_referenced(env, reg)) {
12255 				verbose(env, "allocated object must be referenced\n");
12256 				return -EINVAL;
12257 			}
12258 			ret = process_kf_arg_ptr_to_rbtree_root(env, reg, argno, meta);
12259 			if (ret < 0)
12260 				return ret;
12261 			break;
12262 		case KF_ARG_PTR_TO_LIST_NODE:
12263 			if (is_kfunc_arg_nonown_allowed(btf, &args[i]) &&
12264 			    type_is_non_owning_ref(reg->type) && !reg_is_referenced(env, reg)) {
12265 				/* Allow bpf_list_front/back return value for
12266 				 * __nonown_allowed list-node arguments.
12267 				 */
12268 				goto check_ok;
12269 			}
12270 			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12271 				verbose(env, "%s expected pointer to allocated object\n",
12272 					reg_arg_name(env, argno));
12273 				return -EINVAL;
12274 			}
12275 			if (!reg_is_referenced(env, reg)) {
12276 				verbose(env, "allocated object must be referenced\n");
12277 				return -EINVAL;
12278 			}
12279 check_ok:
12280 			ret = process_kf_arg_ptr_to_list_node(env, reg, argno, meta);
12281 			if (ret < 0)
12282 				return ret;
12283 			break;
12284 		case KF_ARG_PTR_TO_RB_NODE:
12285 			if (is_bpf_rbtree_add_kfunc(meta->func_id)) {
12286 				if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12287 					verbose(env, "%s expected pointer to allocated object\n",
12288 						reg_arg_name(env, argno));
12289 					return -EINVAL;
12290 				}
12291 				if (!reg_is_referenced(env, reg)) {
12292 					verbose(env, "allocated object must be referenced\n");
12293 					return -EINVAL;
12294 				}
12295 			} else {
12296 				if (!type_is_non_owning_ref(reg->type) &&
12297 				    !reg_is_referenced(env, reg)) {
12298 					verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name);
12299 					return -EINVAL;
12300 				}
12301 				if (in_rbtree_lock_required_cb(env)) {
12302 					verbose(env, "%s not allowed in rbtree cb\n", func_name);
12303 					return -EINVAL;
12304 				}
12305 			}
12306 
12307 			ret = process_kf_arg_ptr_to_rbtree_node(env, reg, argno, meta);
12308 			if (ret < 0)
12309 				return ret;
12310 			break;
12311 		case KF_ARG_PTR_TO_MAP:
12312 			/* If argument has '__map' suffix expect 'struct bpf_map *' */
12313 			ref_id = *reg2btf_ids[CONST_PTR_TO_MAP];
12314 			ref_t = btf_type_by_id(btf_vmlinux, ref_id);
12315 			ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12316 			fallthrough;
12317 		case KF_ARG_PTR_TO_BTF_ID:
12318 			/* Only base_type is checked, further checks are done here */
12319 			if ((base_type(reg->type) != PTR_TO_BTF_ID ||
12320 			     (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
12321 			    !reg2btf_ids[base_type(reg->type)]) {
12322 				verbose(env, "%s is %s ", reg_arg_name(env, argno),
12323 					reg_type_str(env, reg->type));
12324 				verbose(env, "expected %s or socket\n",
12325 					reg_type_str(env, base_type(reg->type) |
12326 							  (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
12327 				return -EINVAL;
12328 			}
12329 			ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i, argno);
12330 			if (ret < 0)
12331 				return ret;
12332 			break;
12333 		case KF_ARG_PTR_TO_MEM:
12334 			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
12335 			if (IS_ERR(resolve_ret)) {
12336 				verbose(env, "%s reference type('%s %s') size cannot be determined: %ld\n",
12337 					reg_arg_name(env, argno), btf_type_str(ref_t),
12338 					ref_tname, PTR_ERR(resolve_ret));
12339 				return -EINVAL;
12340 			}
12341 			ret = check_mem_reg(env, reg, argno, type_size);
12342 			if (ret < 0)
12343 				return ret;
12344 			break;
12345 		case KF_ARG_PTR_TO_MEM_SIZE:
12346 		{
12347 			struct bpf_reg_state *buff_reg = reg;
12348 			const struct btf_param *buff_arg = &args[i];
12349 			struct bpf_reg_state *size_reg = get_func_arg_reg(caller, regs, i + 1);
12350 			const struct btf_param *size_arg = &args[i + 1];
12351 			argno_t next_argno = argno_from_arg(i + 2);
12352 
12353 			if (!bpf_register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) {
12354 				ret = check_kfunc_mem_size_reg(env, buff_reg, size_reg,
12355 							       argno, next_argno);
12356 				if (ret < 0) {
12357 					verbose(env, "%s and ", reg_arg_name(env, argno));
12358 					verbose(env, "%s memory, len pair leads to invalid memory access\n",
12359 						reg_arg_name(env, next_argno));
12360 					return ret;
12361 				}
12362 			}
12363 
12364 			if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
12365 				if (meta->arg_constant.found) {
12366 					verifier_bug(env, "only one constant argument permitted");
12367 					return -EFAULT;
12368 				}
12369 				if (!tnum_is_const(size_reg->var_off)) {
12370 					verbose(env, "%s must be a known constant\n",
12371 						reg_arg_name(env, next_argno));
12372 					return -EINVAL;
12373 				}
12374 				meta->arg_constant.found = true;
12375 				meta->arg_constant.value = size_reg->var_off.value;
12376 			}
12377 
12378 			/* Skip next '__sz' or '__szk' argument */
12379 			i++;
12380 			break;
12381 		}
12382 		case KF_ARG_PTR_TO_CALLBACK:
12383 			if (reg->type != PTR_TO_FUNC) {
12384 				verbose(env, "%s expected pointer to func\n", reg_arg_name(env, argno));
12385 				return -EINVAL;
12386 			}
12387 			meta->subprogno = reg->subprogno;
12388 			break;
12389 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12390 			if (!type_is_ptr_alloc_obj(reg->type)) {
12391 				verbose(env, "%s is neither owning or non-owning ref\n",
12392 					reg_arg_name(env, argno));
12393 				return -EINVAL;
12394 			}
12395 			if (!type_is_non_owning_ref(reg->type))
12396 				meta->arg_owning_ref = true;
12397 
12398 			rec = reg_btf_record(reg);
12399 			if (!rec) {
12400 				verifier_bug(env, "Couldn't find btf_record");
12401 				return -EFAULT;
12402 			}
12403 
12404 			if (rec->refcount_off < 0) {
12405 				verbose(env, "%s doesn't point to a type with bpf_refcount field\n",
12406 					reg_arg_name(env, argno));
12407 				return -EINVAL;
12408 			}
12409 
12410 			meta->arg_btf = reg->btf;
12411 			meta->arg_btf_id = reg->btf_id;
12412 			break;
12413 		case KF_ARG_PTR_TO_CONST_STR:
12414 			if (reg->type != PTR_TO_MAP_VALUE) {
12415 				verbose(env, "%s doesn't point to a const string\n",
12416 					reg_arg_name(env, argno));
12417 				return -EINVAL;
12418 			}
12419 			ret = check_arg_const_str(env, reg, argno);
12420 			if (ret)
12421 				return ret;
12422 			break;
12423 		case KF_ARG_PTR_TO_WORKQUEUE:
12424 			if (reg->type != PTR_TO_MAP_VALUE) {
12425 				verbose(env, "%s doesn't point to a map value\n",
12426 					reg_arg_name(env, argno));
12427 				return -EINVAL;
12428 			}
12429 			ret = check_map_field_pointer(env, reg, argno, BPF_WORKQUEUE, &meta->map);
12430 			if (ret < 0)
12431 				return ret;
12432 			break;
12433 		case KF_ARG_PTR_TO_TIMER:
12434 			if (reg->type != PTR_TO_MAP_VALUE) {
12435 				verbose(env, "%s doesn't point to a map value\n",
12436 					reg_arg_name(env, argno));
12437 				return -EINVAL;
12438 			}
12439 			ret = process_timer_kfunc(env, reg, argno, meta);
12440 			if (ret < 0)
12441 				return ret;
12442 			break;
12443 		case KF_ARG_PTR_TO_TASK_WORK:
12444 			if (reg->type != PTR_TO_MAP_VALUE) {
12445 				verbose(env, "%s doesn't point to a map value\n",
12446 					reg_arg_name(env, argno));
12447 				return -EINVAL;
12448 			}
12449 			ret = check_map_field_pointer(env, reg, argno, BPF_TASK_WORK, &meta->map);
12450 			if (ret < 0)
12451 				return ret;
12452 			break;
12453 		case KF_ARG_PTR_TO_IRQ_FLAG:
12454 			if (reg->type != PTR_TO_STACK) {
12455 				verbose(env, "%s doesn't point to an irq flag on stack\n",
12456 					reg_arg_name(env, argno));
12457 				return -EINVAL;
12458 			}
12459 			ret = process_irq_flag(env, reg, argno, meta);
12460 			if (ret < 0)
12461 				return ret;
12462 			break;
12463 		case KF_ARG_PTR_TO_RES_SPIN_LOCK:
12464 		{
12465 			int flags = PROCESS_RES_LOCK;
12466 
12467 			if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12468 				verbose(env, "%s doesn't point to map value or allocated object\n",
12469 					reg_arg_name(env, argno));
12470 				return -EINVAL;
12471 			}
12472 
12473 			if (!is_bpf_res_spin_lock_kfunc(meta->func_id))
12474 				return -EFAULT;
12475 			if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
12476 			    meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
12477 				flags |= PROCESS_SPIN_LOCK;
12478 			if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
12479 			    meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
12480 				flags |= PROCESS_LOCK_IRQ;
12481 			ret = process_spin_lock(env, reg, argno, flags);
12482 			if (ret < 0)
12483 				return ret;
12484 			break;
12485 		}
12486 		}
12487 	}
12488 
12489 	return 0;
12490 }
12491 
12492 int bpf_fetch_kfunc_arg_meta(struct bpf_verifier_env *env,
12493 			     s32 func_id,
12494 			     s16 offset,
12495 			     struct bpf_kfunc_call_arg_meta *meta)
12496 {
12497 	struct bpf_kfunc_meta kfunc;
12498 	int err;
12499 
12500 	err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
12501 	if (err)
12502 		return err;
12503 
12504 	memset(meta, 0, sizeof(*meta));
12505 	meta->btf = kfunc.btf;
12506 	meta->func_id = kfunc.id;
12507 	meta->func_proto = kfunc.proto;
12508 	meta->func_name = kfunc.name;
12509 
12510 	if (!kfunc.flags || !btf_kfunc_is_allowed(kfunc.btf, kfunc.id, env->prog))
12511 		return -EACCES;
12512 
12513 	meta->kfunc_flags = *kfunc.flags;
12514 
12515 	/* Only support release referenced argument passed by register */
12516 	if (is_kfunc_release(meta))
12517 		meta->release_regno = BPF_REG_1;
12518 
12519 	return 0;
12520 }
12521 
12522 /*
12523  * Determine how many bytes a helper accesses through a stack pointer at
12524  * argument position @arg (0-based, corresponding to R1-R5).
12525  *
12526  * Returns:
12527  *   > 0   known read access size in bytes
12528  *     0   doesn't read anything directly
12529  * S64_MIN unknown
12530  *   < 0   known write access of (-return) bytes
12531  */
12532 s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
12533 				  int arg, int insn_idx)
12534 {
12535 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
12536 	const struct bpf_func_proto *fn;
12537 	enum bpf_arg_type at;
12538 	s64 size;
12539 
12540 	if (bpf_get_helper_proto(env, insn->imm, &fn) < 0)
12541 		return S64_MIN;
12542 
12543 	at = fn->arg_type[arg];
12544 
12545 	switch (base_type(at)) {
12546 	case ARG_PTR_TO_MAP_KEY:
12547 	case ARG_PTR_TO_MAP_VALUE: {
12548 		bool is_key = base_type(at) == ARG_PTR_TO_MAP_KEY;
12549 		u64 val;
12550 		int i, map_reg;
12551 
12552 		for (i = 0; i < arg; i++) {
12553 			if (base_type(fn->arg_type[i]) == ARG_CONST_MAP_PTR)
12554 				break;
12555 		}
12556 		if (i >= arg)
12557 			goto scan_all_maps;
12558 
12559 		map_reg = BPF_REG_1 + i;
12560 
12561 		if (!(aux->const_reg_map_mask & BIT(map_reg)))
12562 			goto scan_all_maps;
12563 
12564 		i = aux->const_reg_vals[map_reg];
12565 		if (i < env->used_map_cnt) {
12566 			size = is_key ? env->used_maps[i]->key_size
12567 				      : env->used_maps[i]->value_size;
12568 			goto out;
12569 		}
12570 scan_all_maps:
12571 		/*
12572 		 * Map pointer is not known at this call site (e.g. different
12573 		 * maps on merged paths).  Conservatively return the largest
12574 		 * key_size or value_size across all maps used by the program.
12575 		 */
12576 		val = 0;
12577 		for (i = 0; i < env->used_map_cnt; i++) {
12578 			struct bpf_map *map = env->used_maps[i];
12579 			u32 sz = is_key ? map->key_size : map->value_size;
12580 
12581 			if (sz > val)
12582 				val = sz;
12583 			if (map->inner_map_meta) {
12584 				sz = is_key ? map->inner_map_meta->key_size
12585 					    : map->inner_map_meta->value_size;
12586 				if (sz > val)
12587 					val = sz;
12588 			}
12589 		}
12590 		if (!val)
12591 			return S64_MIN;
12592 		size = val;
12593 		goto out;
12594 	}
12595 	case ARG_PTR_TO_MEM:
12596 		if (at & MEM_FIXED_SIZE) {
12597 			size = fn->arg_size[arg];
12598 			goto out;
12599 		}
12600 		if (arg + 1 < ARRAY_SIZE(fn->arg_type) &&
12601 		    arg_type_is_mem_size(fn->arg_type[arg + 1])) {
12602 			int size_reg = BPF_REG_1 + arg + 1;
12603 
12604 			if (aux->const_reg_mask & BIT(size_reg)) {
12605 				size = (s64)aux->const_reg_vals[size_reg];
12606 				goto out;
12607 			}
12608 			/*
12609 			 * Size arg is const on each path but differs across merged
12610 			 * paths. MAX_BPF_STACK is a safe upper bound for reads.
12611 			 */
12612 			if (at & MEM_UNINIT)
12613 				return 0;
12614 			return MAX_BPF_STACK;
12615 		}
12616 		return S64_MIN;
12617 	case ARG_PTR_TO_DYNPTR:
12618 		size = BPF_DYNPTR_SIZE;
12619 		break;
12620 	case ARG_PTR_TO_STACK:
12621 		/*
12622 		 * Only used by bpf_calls_callback() helpers. The helper itself
12623 		 * doesn't access stack. The callback subprog does and it's
12624 		 * analyzed separately.
12625 		 */
12626 		return 0;
12627 	default:
12628 		return S64_MIN;
12629 	}
12630 out:
12631 	/*
12632 	 * MEM_UNINIT args are write-only: the helper initializes the
12633 	 * buffer without reading it.
12634 	 */
12635 	if (at & MEM_UNINIT)
12636 		return -size;
12637 	return size;
12638 }
12639 
12640 /*
12641  * Determine how many bytes a kfunc accesses through a stack pointer at
12642  * argument position @arg (0-based, corresponding to R1-R5).
12643  *
12644  * Returns:
12645  *   > 0      known read access size in bytes
12646  *     0      doesn't access memory through that argument (ex: not a pointer)
12647  *   S64_MIN  unknown
12648  *   < 0      known write access of (-return) bytes
12649  */
12650 s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
12651 				 int arg, int insn_idx)
12652 {
12653 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
12654 	struct bpf_kfunc_call_arg_meta meta;
12655 	const struct btf_param *args;
12656 	const struct btf_type *t, *ref_t;
12657 	const struct btf *btf;
12658 	u32 nargs, type_size;
12659 	s64 size;
12660 
12661 	if (bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta) < 0)
12662 		return S64_MIN;
12663 
12664 	btf = meta.btf;
12665 	args = btf_params(meta.func_proto);
12666 	nargs = btf_type_vlen(meta.func_proto);
12667 	if (arg >= nargs)
12668 		return 0;
12669 
12670 	t = btf_type_skip_modifiers(btf, args[arg].type, NULL);
12671 	if (!btf_type_is_ptr(t))
12672 		return 0;
12673 
12674 	/* dynptr: fixed 16-byte on-stack representation */
12675 	if (is_kfunc_arg_dynptr(btf, &args[arg])) {
12676 		size = BPF_DYNPTR_SIZE;
12677 		goto out;
12678 	}
12679 
12680 	/* ptr + __sz/__szk pair: size is in the next register */
12681 	if (arg + 1 < nargs &&
12682 	    (btf_param_match_suffix(btf, &args[arg + 1], "__sz") ||
12683 	     btf_param_match_suffix(btf, &args[arg + 1], "__szk"))) {
12684 		int size_reg = BPF_REG_1 + arg + 1;
12685 
12686 		if (aux->const_reg_mask & BIT(size_reg)) {
12687 			size = (s64)aux->const_reg_vals[size_reg];
12688 			goto out;
12689 		}
12690 		return MAX_BPF_STACK;
12691 	}
12692 
12693 	/* fixed-size pointed-to type: resolve via BTF */
12694 	ref_t = btf_type_skip_modifiers(btf, t->type, NULL);
12695 	if (!IS_ERR(btf_resolve_size(btf, ref_t, &type_size))) {
12696 		size = type_size;
12697 		goto out;
12698 	}
12699 
12700 	return S64_MIN;
12701 out:
12702 	/* KF_ITER_NEW kfuncs initialize the iterator state at arg 0 */
12703 	if (arg == 0 && meta.kfunc_flags & KF_ITER_NEW)
12704 		return -size;
12705 	if (is_kfunc_arg_uninit(btf, &args[arg]))
12706 		return -size;
12707 	return size;
12708 }
12709 
12710 /* check special kfuncs and return:
12711  *  1  - not fall-through to 'else' branch, continue verification
12712  *  0  - fall-through to 'else' branch
12713  * < 0 - not fall-through to 'else' branch, return error
12714  */
12715 static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
12716 			       struct bpf_reg_state *regs, struct bpf_insn_aux_data *insn_aux,
12717 			       const struct btf_type *ptr_type, struct btf *desc_btf)
12718 {
12719 	const struct btf_type *ret_t;
12720 	int err = 0;
12721 
12722 	if (meta->btf != btf_vmlinux)
12723 		return 0;
12724 
12725 	if (is_bpf_obj_new_kfunc(meta->func_id) || is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12726 		struct btf_struct_meta *struct_meta;
12727 		struct btf *ret_btf;
12728 		u32 ret_btf_id;
12729 
12730 		if (is_bpf_obj_new_kfunc(meta->func_id) && !bpf_global_ma_set)
12731 			return -ENOMEM;
12732 
12733 		if (((u64)(u32)meta->arg_constant.value) != meta->arg_constant.value) {
12734 			verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
12735 			return -EINVAL;
12736 		}
12737 
12738 		ret_btf = env->prog->aux->btf;
12739 		ret_btf_id = meta->arg_constant.value;
12740 
12741 		/* This may be NULL due to user not supplying a BTF */
12742 		if (!ret_btf) {
12743 			verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
12744 			return -EINVAL;
12745 		}
12746 
12747 		ret_t = btf_type_by_id(ret_btf, ret_btf_id);
12748 		if (!ret_t || !__btf_type_is_struct(ret_t)) {
12749 			verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
12750 			return -EINVAL;
12751 		}
12752 
12753 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12754 			if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
12755 				verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
12756 					ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
12757 				return -EINVAL;
12758 			}
12759 
12760 			if (!bpf_global_percpu_ma_set) {
12761 				mutex_lock(&bpf_percpu_ma_lock);
12762 				if (!bpf_global_percpu_ma_set) {
12763 					/* Charge memory allocated with bpf_global_percpu_ma to
12764 					 * root memcg. The obj_cgroup for root memcg is NULL.
12765 					 */
12766 					err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
12767 					if (!err)
12768 						bpf_global_percpu_ma_set = true;
12769 				}
12770 				mutex_unlock(&bpf_percpu_ma_lock);
12771 				if (err)
12772 					return err;
12773 			}
12774 
12775 			mutex_lock(&bpf_percpu_ma_lock);
12776 			err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
12777 			mutex_unlock(&bpf_percpu_ma_lock);
12778 			if (err)
12779 				return err;
12780 		}
12781 
12782 		struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
12783 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12784 			if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
12785 				verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
12786 				return -EINVAL;
12787 			}
12788 
12789 			if (struct_meta) {
12790 				verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
12791 				return -EINVAL;
12792 			}
12793 		}
12794 
12795 		mark_reg_known_zero(env, regs, BPF_REG_0);
12796 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
12797 		regs[BPF_REG_0].btf = ret_btf;
12798 		regs[BPF_REG_0].btf_id = ret_btf_id;
12799 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id))
12800 			regs[BPF_REG_0].type |= MEM_PERCPU;
12801 
12802 		insn_aux->obj_new_size = ret_t->size;
12803 		insn_aux->kptr_struct_meta = struct_meta;
12804 	} else if (is_bpf_refcount_acquire_kfunc(meta->func_id)) {
12805 		mark_reg_known_zero(env, regs, BPF_REG_0);
12806 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
12807 		regs[BPF_REG_0].btf = meta->arg_btf;
12808 		regs[BPF_REG_0].btf_id = meta->arg_btf_id;
12809 
12810 		insn_aux->kptr_struct_meta =
12811 			btf_find_struct_meta(meta->arg_btf,
12812 					     meta->arg_btf_id);
12813 	} else if (is_list_node_type(ptr_type)) {
12814 		struct btf_field *field = meta->arg_list_head.field;
12815 
12816 		mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
12817 	} else if (is_rbtree_node_type(ptr_type)) {
12818 		struct btf_field *field = meta->arg_rbtree_root.field;
12819 
12820 		mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
12821 	} else if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12822 		mark_reg_known_zero(env, regs, BPF_REG_0);
12823 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
12824 		regs[BPF_REG_0].btf = desc_btf;
12825 		regs[BPF_REG_0].btf_id = meta->ret_btf_id;
12826 	} else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
12827 		ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value);
12828 		if (!ret_t) {
12829 			verbose(env, "Unknown type ID %lld passed to kfunc bpf_rdonly_cast\n",
12830 				meta->arg_constant.value);
12831 			return -EINVAL;
12832 		} else if (btf_type_is_struct(ret_t)) {
12833 			mark_reg_known_zero(env, regs, BPF_REG_0);
12834 			regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
12835 			regs[BPF_REG_0].btf = desc_btf;
12836 			regs[BPF_REG_0].btf_id = meta->arg_constant.value;
12837 		} else if (btf_type_is_void(ret_t)) {
12838 			mark_reg_known_zero(env, regs, BPF_REG_0);
12839 			regs[BPF_REG_0].type = PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED;
12840 			regs[BPF_REG_0].mem_size = 0;
12841 		} else {
12842 			verbose(env,
12843 				"kfunc bpf_rdonly_cast type ID argument must be of a struct or void\n");
12844 			return -EINVAL;
12845 		}
12846 	} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
12847 		   meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
12848 		enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->dynptr.type);
12849 
12850 		mark_reg_known_zero(env, regs, BPF_REG_0);
12851 
12852 		if (!meta->arg_constant.found) {
12853 			verifier_bug(env, "bpf_dynptr_slice(_rdwr) no constant size");
12854 			return -EFAULT;
12855 		}
12856 
12857 		regs[BPF_REG_0].mem_size = meta->arg_constant.value;
12858 
12859 		/* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
12860 		regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
12861 
12862 		if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
12863 			regs[BPF_REG_0].type |= MEM_RDONLY;
12864 		} else {
12865 			/* this will set env->seen_direct_write to true */
12866 			if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
12867 				verbose(env, "the prog does not allow writes to packet data\n");
12868 				return -EINVAL;
12869 			}
12870 		}
12871 
12872 		if (!meta->dynptr.id) {
12873 			verifier_bug(env, "no dynptr id");
12874 			return -EFAULT;
12875 		}
12876 		regs[BPF_REG_0].parent_id = meta->dynptr.id;
12877 	} else {
12878 		return 0;
12879 	}
12880 
12881 	return 1;
12882 }
12883 
12884 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
12885 
12886 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
12887 			    int *insn_idx_p)
12888 {
12889 	bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable;
12890 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
12891 	struct bpf_reg_state *regs = cur_regs(env);
12892 	const char *func_name, *ptr_type_name;
12893 	const struct btf_type *t, *ptr_type;
12894 	struct bpf_kfunc_call_arg_meta meta;
12895 	struct bpf_insn_aux_data *insn_aux;
12896 	int err, insn_idx = *insn_idx_p;
12897 	const struct btf_param *args;
12898 	u32 i, nargs, ptr_type_id;
12899 	struct btf *desc_btf;
12900 	int id;
12901 
12902 	/* skip for now, but return error when we find this in fixup_kfunc_call */
12903 	if (!insn->imm)
12904 		return 0;
12905 
12906 	err = bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
12907 	if (err == -EACCES && meta.func_name)
12908 		verbose(env, "calling kernel function %s is not allowed\n", meta.func_name);
12909 	if (err)
12910 		return err;
12911 	desc_btf = meta.btf;
12912 	func_name = meta.func_name;
12913 	insn_aux = &env->insn_aux_data[insn_idx];
12914 
12915 	insn_aux->is_iter_next = bpf_is_iter_next_kfunc(&meta);
12916 
12917 	if (!insn->off &&
12918 	    (insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] ||
12919 	     insn->imm == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])) {
12920 		struct bpf_verifier_state *branch;
12921 		struct bpf_reg_state *regs;
12922 
12923 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
12924 		if (IS_ERR(branch)) {
12925 			verbose(env, "failed to push state for failed lock acquisition\n");
12926 			return PTR_ERR(branch);
12927 		}
12928 
12929 		regs = branch->frame[branch->curframe]->regs;
12930 
12931 		/* Clear r0-r5 registers in forked state */
12932 		for (i = 0; i < CALLER_SAVED_REGS; i++)
12933 			bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
12934 
12935 		mark_reg_unknown(env, regs, BPF_REG_0);
12936 		err = __mark_reg_s32_range(env, regs, BPF_REG_0, -MAX_ERRNO, -1);
12937 		if (err) {
12938 			verbose(env, "failed to mark s32 range for retval in forked state for lock\n");
12939 			return err;
12940 		}
12941 		__mark_btf_func_reg_size(env, regs, BPF_REG_0, sizeof(u32));
12942 	} else if (!insn->off && insn->imm == special_kfunc_list[KF___bpf_trap]) {
12943 		verbose(env, "unexpected __bpf_trap() due to uninitialized variable?\n");
12944 		return -EFAULT;
12945 	}
12946 
12947 	if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
12948 		verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
12949 		return -EACCES;
12950 	}
12951 
12952 	sleepable = bpf_is_kfunc_sleepable(&meta);
12953 	if (sleepable && !in_sleepable(env)) {
12954 		verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
12955 		return -EACCES;
12956 	}
12957 
12958 	/* Track non-sleepable context for kfuncs, same as for helpers. */
12959 	if (!in_sleepable_context(env))
12960 		insn_aux->non_sleepable = true;
12961 
12962 	/* Check the arguments */
12963 	err = check_kfunc_args(env, &meta, insn_idx);
12964 	if (err < 0)
12965 		return err;
12966 
12967 	if ((is_bpf_obj_drop_kfunc(meta.func_id) ||
12968 	     is_bpf_percpu_obj_drop_kfunc(meta.func_id)) && (is_tracing_prog_type(prog_type) ||
12969 	     /* is_tracing_prog_type() for now doesn't cover non-iterator tracing progs. */
12970 	     (prog_type == BPF_PROG_TYPE_TRACING && env->prog->expected_attach_type != BPF_TRACE_ITER
12971 	      && !env->prog->sleepable))) {
12972 		struct btf_struct_meta *struct_meta;
12973 
12974 		struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
12975 		if (struct_meta && btf_record_has_nmi_unsafe_fields(struct_meta->record)) {
12976 			verbose(env, "%s cannot be used in tracing programs on types with NMI unsafe fields\n",
12977 				func_name);
12978 			return -EINVAL;
12979 		}
12980 	}
12981 
12982 	if (is_bpf_rbtree_add_kfunc(meta.func_id)) {
12983 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
12984 					 set_rbtree_add_callback_state);
12985 		if (err) {
12986 			verbose(env, "kfunc %s#%d failed callback verification\n",
12987 				func_name, meta.func_id);
12988 			return err;
12989 		}
12990 	}
12991 
12992 	if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) {
12993 		meta.r0_size = sizeof(u64);
12994 		meta.r0_rdonly = false;
12995 	}
12996 
12997 	if (is_bpf_wq_set_callback_kfunc(meta.func_id)) {
12998 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
12999 					 set_timer_callback_state);
13000 		if (err) {
13001 			verbose(env, "kfunc %s#%d failed callback verification\n",
13002 				func_name, meta.func_id);
13003 			return err;
13004 		}
13005 	}
13006 
13007 	if (is_task_work_add_kfunc(meta.func_id)) {
13008 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
13009 					 set_task_work_schedule_callback_state);
13010 		if (err) {
13011 			verbose(env, "kfunc %s#%d failed callback verification\n",
13012 				func_name, meta.func_id);
13013 			return err;
13014 		}
13015 	}
13016 
13017 	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
13018 	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
13019 
13020 	preempt_disable = is_kfunc_bpf_preempt_disable(&meta);
13021 	preempt_enable = is_kfunc_bpf_preempt_enable(&meta);
13022 
13023 	if (rcu_lock) {
13024 		env->cur_state->active_rcu_locks++;
13025 	} else if (rcu_unlock) {
13026 		if (env->cur_state->active_rcu_locks == 0) {
13027 			verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
13028 			return -EINVAL;
13029 		}
13030 		if (--env->cur_state->active_rcu_locks == 0)
13031 			invalidate_rcu_protected_refs(env);
13032 	} else if (preempt_disable) {
13033 		env->cur_state->active_preempt_locks++;
13034 	} else if (preempt_enable) {
13035 		if (env->cur_state->active_preempt_locks == 0) {
13036 			verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name);
13037 			return -EINVAL;
13038 		}
13039 		env->cur_state->active_preempt_locks--;
13040 	}
13041 
13042 	if (sleepable && !in_sleepable_context(env)) {
13043 		verbose(env, "kernel func %s is sleepable within %s\n",
13044 			func_name, non_sleepable_context_description(env));
13045 		return -EACCES;
13046 	}
13047 
13048 	if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
13049 		verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
13050 		return -EACCES;
13051 	}
13052 
13053 	if (is_kfunc_rcu_protected(&meta) && !in_rcu_cs(env)) {
13054 		verbose(env, "kernel func %s requires RCU critical section protection\n", func_name);
13055 		return -EACCES;
13056 	}
13057 
13058 	/* In case of release function, we get register number of refcounted
13059 	 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
13060 	 */
13061 	if (meta.release_regno) {
13062 		err = release_reg(env, &regs[meta.release_regno], false, !!meta.dynptr.id);
13063 		if (err)
13064 			return err;
13065 	}
13066 
13067 	if (is_bpf_list_push_kfunc(meta.func_id) || is_bpf_rbtree_add_kfunc(meta.func_id)) {
13068 		id = regs[BPF_REG_2].id;
13069 		insn_aux->insert_off = regs[BPF_REG_2].var_off.value;
13070 		insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
13071 		ref_convert_owning_non_owning(env, id);
13072 	}
13073 
13074 	if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
13075 		if (!bpf_jit_supports_exceptions()) {
13076 			verbose(env, "JIT does not support calling kfunc %s#%d\n",
13077 				func_name, meta.func_id);
13078 			return -ENOTSUPP;
13079 		}
13080 		env->seen_exception = true;
13081 
13082 		/* In the case of the default callback, the cookie value passed
13083 		 * to bpf_throw becomes the return value of the program.
13084 		 */
13085 		if (!env->exception_callback_subprog) {
13086 			err = check_return_code(env, BPF_REG_1, "R1");
13087 			if (err < 0)
13088 				return err;
13089 		}
13090 	}
13091 
13092 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
13093 		u32 regno = caller_saved[i];
13094 
13095 		bpf_mark_reg_not_init(env, &regs[regno]);
13096 		regs[regno].subreg_def = DEF_NOT_SUBREG;
13097 	}
13098 	invalidate_outgoing_stack_args(env, cur_func(env));
13099 
13100 	/* Check return type */
13101 	t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
13102 
13103 	if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
13104 		if (meta.btf != btf_vmlinux ||
13105 		    (!is_bpf_obj_new_kfunc(meta.func_id) &&
13106 		     !is_bpf_percpu_obj_new_kfunc(meta.func_id) &&
13107 		     !is_bpf_refcount_acquire_kfunc(meta.func_id))) {
13108 			verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
13109 			return -EINVAL;
13110 		}
13111 	}
13112 
13113 	if (btf_type_is_scalar(t)) {
13114 		mark_reg_unknown(env, regs, BPF_REG_0);
13115 		if (meta.btf == btf_vmlinux && (meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
13116 		    meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]))
13117 			__mark_reg_const_zero(env, &regs[BPF_REG_0]);
13118 		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
13119 	} else if (btf_type_is_ptr(t)) {
13120 		ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
13121 		err = check_special_kfunc(env, &meta, regs, insn_aux, ptr_type, desc_btf);
13122 		if (err) {
13123 			if (err < 0)
13124 				return err;
13125 		} else if (btf_type_is_void(ptr_type)) {
13126 			/* kfunc returning 'void *' is equivalent to returning scalar */
13127 			mark_reg_unknown(env, regs, BPF_REG_0);
13128 		} else if (!__btf_type_is_struct(ptr_type)) {
13129 			if (!meta.r0_size) {
13130 				__u32 sz;
13131 
13132 				if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
13133 					meta.r0_size = sz;
13134 					meta.r0_rdonly = true;
13135 				}
13136 			}
13137 			if (!meta.r0_size) {
13138 				ptr_type_name = btf_name_by_offset(desc_btf,
13139 								   ptr_type->name_off);
13140 				verbose(env,
13141 					"kernel function %s returns pointer type %s %s is not supported\n",
13142 					func_name,
13143 					btf_type_str(ptr_type),
13144 					ptr_type_name);
13145 				return -EINVAL;
13146 			}
13147 
13148 			mark_reg_known_zero(env, regs, BPF_REG_0);
13149 			regs[BPF_REG_0].type = PTR_TO_MEM;
13150 			regs[BPF_REG_0].mem_size = meta.r0_size;
13151 
13152 			if (meta.r0_rdonly)
13153 				regs[BPF_REG_0].type |= MEM_RDONLY;
13154 
13155 			/* Ensures we don't access the memory after a release_reference() */
13156 			if (meta.ref_obj.id) {
13157 				err = validate_ref_obj(env, &meta.ref_obj);
13158 				if (err)
13159 					return err;
13160 				regs[BPF_REG_0].parent_id = meta.ref_obj.id;
13161 			}
13162 
13163 			if (is_kfunc_rcu_protected(&meta))
13164 				regs[BPF_REG_0].type |= MEM_RCU;
13165 		} else {
13166 			enum bpf_reg_type type = PTR_TO_BTF_ID;
13167 
13168 			if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache])
13169 				type |= PTR_UNTRUSTED;
13170 			else if (is_kfunc_rcu_protected(&meta) ||
13171 				 (bpf_is_iter_next_kfunc(&meta) &&
13172 				  (get_iter_from_state(env->cur_state, &meta)
13173 					   ->type & MEM_RCU))) {
13174 				/*
13175 				 * If the iterator's constructor (the _new
13176 				 * function e.g., bpf_iter_task_new) has been
13177 				 * annotated with BPF kfunc flag
13178 				 * KF_RCU_PROTECTED and was called within a RCU
13179 				 * read-side critical section, also propagate
13180 				 * the MEM_RCU flag to the pointer returned from
13181 				 * the iterator's next function (e.g.,
13182 				 * bpf_iter_task_next).
13183 				 */
13184 				type |= MEM_RCU;
13185 			} else {
13186 				/*
13187 				 * Any PTR_TO_BTF_ID that is returned from a BPF
13188 				 * kfunc should by default be treated as
13189 				 * implicitly trusted.
13190 				 */
13191 				type |= PTR_TRUSTED;
13192 			}
13193 
13194 			mark_reg_known_zero(env, regs, BPF_REG_0);
13195 			regs[BPF_REG_0].btf = desc_btf;
13196 			regs[BPF_REG_0].type = type;
13197 			regs[BPF_REG_0].btf_id = ptr_type_id;
13198 		}
13199 
13200 		if (is_kfunc_ret_null(&meta)) {
13201 			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
13202 			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
13203 			regs[BPF_REG_0].id = ++env->id_gen;
13204 		}
13205 		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
13206 		if (is_kfunc_acquire(&meta)) {
13207 			id = acquire_reference(env, insn_idx, 0);
13208 			if (id < 0)
13209 				return id;
13210 			regs[BPF_REG_0].id = id;
13211 		} else if (is_rbtree_node_type(ptr_type) || is_list_node_type(ptr_type)) {
13212 			ref_set_non_owning(env, &regs[BPF_REG_0]);
13213 		}
13214 
13215 		if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
13216 			regs[BPF_REG_0].id = ++env->id_gen;
13217 	} else if (btf_type_is_void(t)) {
13218 		if (meta.btf == btf_vmlinux) {
13219 			if (is_bpf_obj_drop_kfunc(meta.func_id) ||
13220 			    is_bpf_percpu_obj_drop_kfunc(meta.func_id)) {
13221 				insn_aux->kptr_struct_meta =
13222 					btf_find_struct_meta(meta.arg_btf,
13223 							     meta.arg_btf_id);
13224 			}
13225 		}
13226 	}
13227 
13228 	if (bpf_is_kfunc_pkt_changing(&meta))
13229 		clear_all_pkt_pointers(env);
13230 
13231 	nargs = btf_type_vlen(meta.func_proto);
13232 	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
13233 		struct bpf_func_state *caller = cur_func(env);
13234 		struct bpf_subprog_info *caller_info = &env->subprog_info[caller->subprogno];
13235 		u16 out_stack_arg_cnt = nargs - MAX_BPF_FUNC_REG_ARGS;
13236 		u16 stack_arg_cnt = bpf_in_stack_arg_cnt(caller_info) + out_stack_arg_cnt;
13237 
13238 		if (stack_arg_cnt > caller_info->stack_arg_cnt)
13239 			caller_info->stack_arg_cnt = stack_arg_cnt;
13240 	}
13241 
13242 	args = (const struct btf_param *)(meta.func_proto + 1);
13243 	for (i = 0; i < min_t(int, nargs, MAX_BPF_FUNC_REG_ARGS); i++) {
13244 		u32 regno = i + 1;
13245 
13246 		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
13247 		if (btf_type_is_ptr(t))
13248 			mark_btf_func_reg_size(env, regno, sizeof(void *));
13249 		else
13250 			/* scalar. ensured by check_kfunc_args() */
13251 			mark_btf_func_reg_size(env, regno, t->size);
13252 	}
13253 
13254 	if (bpf_is_iter_next_kfunc(&meta)) {
13255 		err = process_iter_next_call(env, insn_idx, &meta);
13256 		if (err)
13257 			return err;
13258 	}
13259 
13260 	if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie])
13261 		env->prog->call_session_cookie = true;
13262 
13263 	if (bpf_is_throw_kfunc(insn))
13264 		return process_bpf_exit_full(env, NULL, true);
13265 
13266 	return 0;
13267 }
13268 
13269 static bool check_reg_sane_offset_scalar(struct bpf_verifier_env *env,
13270 					 const struct bpf_reg_state *reg,
13271 					 enum bpf_reg_type type)
13272 {
13273 	bool known = tnum_is_const(reg->var_off);
13274 	s64 val = reg->var_off.value;
13275 	s64 smin = reg_smin(reg);
13276 
13277 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
13278 		verbose(env, "math between %s pointer and %lld is not allowed\n",
13279 			reg_type_str(env, type), val);
13280 		return false;
13281 	}
13282 
13283 	if (smin == S64_MIN) {
13284 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
13285 			reg_type_str(env, type));
13286 		return false;
13287 	}
13288 
13289 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
13290 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
13291 			smin, reg_type_str(env, type));
13292 		return false;
13293 	}
13294 
13295 	return true;
13296 }
13297 
13298 static bool check_reg_sane_offset_ptr(struct bpf_verifier_env *env,
13299 				      const struct bpf_reg_state *reg,
13300 				      enum bpf_reg_type type)
13301 {
13302 	bool known = tnum_is_const(reg->var_off);
13303 	s64 val = reg->var_off.value;
13304 	s64 smin = reg_smin(reg);
13305 
13306 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
13307 		verbose(env, "%s pointer offset %lld is not allowed\n",
13308 			reg_type_str(env, type), val);
13309 		return false;
13310 	}
13311 
13312 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
13313 		verbose(env, "%s pointer offset %lld is not allowed\n",
13314 			reg_type_str(env, type), smin);
13315 		return false;
13316 	}
13317 
13318 	return true;
13319 }
13320 
13321 enum {
13322 	REASON_BOUNDS	= -1,
13323 	REASON_TYPE	= -2,
13324 	REASON_PATHS	= -3,
13325 	REASON_LIMIT	= -4,
13326 	REASON_STACK	= -5,
13327 };
13328 
13329 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
13330 			      u32 *alu_limit, bool mask_to_left)
13331 {
13332 	u32 max = 0, ptr_limit = 0;
13333 
13334 	switch (ptr_reg->type) {
13335 	case PTR_TO_STACK:
13336 		/* Offset 0 is out-of-bounds, but acceptable start for the
13337 		 * left direction, see BPF_REG_FP. Also, unknown scalar
13338 		 * offset where we would need to deal with min/max bounds is
13339 		 * currently prohibited for unprivileged.
13340 		 */
13341 		max = MAX_BPF_STACK + mask_to_left;
13342 		ptr_limit = -ptr_reg->var_off.value;
13343 		break;
13344 	case PTR_TO_MAP_VALUE:
13345 		max = ptr_reg->map_ptr->value_size;
13346 		ptr_limit = mask_to_left ? reg_smin(ptr_reg) : reg_umax(ptr_reg);
13347 		break;
13348 	default:
13349 		return REASON_TYPE;
13350 	}
13351 
13352 	if (ptr_limit >= max)
13353 		return REASON_LIMIT;
13354 	*alu_limit = ptr_limit;
13355 	return 0;
13356 }
13357 
13358 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
13359 				    const struct bpf_insn *insn)
13360 {
13361 	return env->bypass_spec_v1 ||
13362 		BPF_SRC(insn->code) == BPF_K ||
13363 		cur_aux(env)->nospec;
13364 }
13365 
13366 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
13367 				       u32 alu_state, u32 alu_limit)
13368 {
13369 	/* If we arrived here from different branches with different
13370 	 * state or limits to sanitize, then this won't work.
13371 	 */
13372 	if (aux->alu_state &&
13373 	    (aux->alu_state != alu_state ||
13374 	     aux->alu_limit != alu_limit))
13375 		return REASON_PATHS;
13376 
13377 	/* Corresponding fixup done in do_misc_fixups(). */
13378 	aux->alu_state = alu_state;
13379 	aux->alu_limit = alu_limit;
13380 	return 0;
13381 }
13382 
13383 static int sanitize_val_alu(struct bpf_verifier_env *env,
13384 			    struct bpf_insn *insn)
13385 {
13386 	struct bpf_insn_aux_data *aux = cur_aux(env);
13387 
13388 	if (can_skip_alu_sanitation(env, insn))
13389 		return 0;
13390 
13391 	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
13392 }
13393 
13394 static bool sanitize_needed(u8 opcode)
13395 {
13396 	return opcode == BPF_ADD || opcode == BPF_SUB;
13397 }
13398 
13399 struct bpf_sanitize_info {
13400 	struct bpf_insn_aux_data aux;
13401 	bool mask_to_left;
13402 };
13403 
13404 static int sanitize_speculative_path(struct bpf_verifier_env *env,
13405 				     const struct bpf_insn *insn,
13406 				     u32 next_idx, u32 curr_idx)
13407 {
13408 	struct bpf_verifier_state *branch;
13409 	struct bpf_reg_state *regs;
13410 
13411 	branch = push_stack(env, next_idx, curr_idx, true);
13412 	if (!IS_ERR(branch) && insn) {
13413 		regs = branch->frame[branch->curframe]->regs;
13414 		if (BPF_SRC(insn->code) == BPF_K) {
13415 			mark_reg_unknown(env, regs, insn->dst_reg);
13416 		} else if (BPF_SRC(insn->code) == BPF_X) {
13417 			mark_reg_unknown(env, regs, insn->dst_reg);
13418 			mark_reg_unknown(env, regs, insn->src_reg);
13419 		}
13420 	}
13421 	return PTR_ERR_OR_ZERO(branch);
13422 }
13423 
13424 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
13425 			    struct bpf_insn *insn,
13426 			    const struct bpf_reg_state *ptr_reg,
13427 			    const struct bpf_reg_state *off_reg,
13428 			    struct bpf_reg_state *dst_reg,
13429 			    struct bpf_sanitize_info *info,
13430 			    const bool commit_window)
13431 {
13432 	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
13433 	struct bpf_verifier_state *vstate = env->cur_state;
13434 	bool off_is_imm = tnum_is_const(off_reg->var_off);
13435 	bool off_is_neg = reg_smin(off_reg) < 0;
13436 	bool ptr_is_dst_reg = ptr_reg == dst_reg;
13437 	u8 opcode = BPF_OP(insn->code);
13438 	u32 alu_state, alu_limit;
13439 	struct bpf_reg_state tmp;
13440 	int err;
13441 
13442 	if (can_skip_alu_sanitation(env, insn))
13443 		return 0;
13444 
13445 	/* We already marked aux for masking from non-speculative
13446 	 * paths, thus we got here in the first place. We only care
13447 	 * to explore bad access from here.
13448 	 */
13449 	if (vstate->speculative)
13450 		goto do_sim;
13451 
13452 	if (!commit_window) {
13453 		if (!tnum_is_const(off_reg->var_off) &&
13454 		    (reg_smin(off_reg) < 0) != (reg_smax(off_reg) < 0))
13455 			return REASON_BOUNDS;
13456 
13457 		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
13458 				     (opcode == BPF_SUB && !off_is_neg);
13459 	}
13460 
13461 	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
13462 	if (err < 0)
13463 		return err;
13464 
13465 	if (commit_window) {
13466 		/* In commit phase we narrow the masking window based on
13467 		 * the observed pointer move after the simulated operation.
13468 		 */
13469 		alu_state = info->aux.alu_state;
13470 		alu_limit = abs(info->aux.alu_limit - alu_limit);
13471 	} else {
13472 		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
13473 		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
13474 		alu_state |= ptr_is_dst_reg ?
13475 			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
13476 
13477 		/* Limit pruning on unknown scalars to enable deep search for
13478 		 * potential masking differences from other program paths.
13479 		 */
13480 		if (!off_is_imm)
13481 			env->explore_alu_limits = true;
13482 	}
13483 
13484 	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
13485 	if (err < 0)
13486 		return err;
13487 do_sim:
13488 	/* If we're in commit phase, we're done here given we already
13489 	 * pushed the truncated dst_reg into the speculative verification
13490 	 * stack.
13491 	 *
13492 	 * Also, when register is a known constant, we rewrite register-based
13493 	 * operation to immediate-based, and thus do not need masking (and as
13494 	 * a consequence, do not need to simulate the zero-truncation either).
13495 	 */
13496 	if (commit_window || off_is_imm)
13497 		return 0;
13498 
13499 	/* Simulate and find potential out-of-bounds access under
13500 	 * speculative execution from truncation as a result of
13501 	 * masking when off was not within expected range. If off
13502 	 * sits in dst, then we temporarily need to move ptr there
13503 	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
13504 	 * for cases where we use K-based arithmetic in one direction
13505 	 * and truncated reg-based in the other in order to explore
13506 	 * bad access.
13507 	 */
13508 	if (!ptr_is_dst_reg) {
13509 		tmp = *dst_reg;
13510 		*dst_reg = *ptr_reg;
13511 	}
13512 	err = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx);
13513 	if (err < 0)
13514 		return REASON_STACK;
13515 	if (!ptr_is_dst_reg)
13516 		*dst_reg = tmp;
13517 	return 0;
13518 }
13519 
13520 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
13521 {
13522 	struct bpf_verifier_state *vstate = env->cur_state;
13523 
13524 	/* If we simulate paths under speculation, we don't update the
13525 	 * insn as 'seen' such that when we verify unreachable paths in
13526 	 * the non-speculative domain, sanitize_dead_code() can still
13527 	 * rewrite/sanitize them.
13528 	 */
13529 	if (!vstate->speculative)
13530 		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
13531 }
13532 
13533 static int sanitize_err(struct bpf_verifier_env *env,
13534 			const struct bpf_insn *insn, int reason,
13535 			const struct bpf_reg_state *off_reg,
13536 			const struct bpf_reg_state *dst_reg)
13537 {
13538 	static const char *err = "pointer arithmetic with it prohibited for !root";
13539 	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
13540 	u32 dst = insn->dst_reg, src = insn->src_reg;
13541 
13542 	switch (reason) {
13543 	case REASON_BOUNDS:
13544 		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
13545 			off_reg == dst_reg ? dst : src, err);
13546 		break;
13547 	case REASON_TYPE:
13548 		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
13549 			off_reg == dst_reg ? src : dst, err);
13550 		break;
13551 	case REASON_PATHS:
13552 		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
13553 			dst, op, err);
13554 		break;
13555 	case REASON_LIMIT:
13556 		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
13557 			dst, op, err);
13558 		break;
13559 	case REASON_STACK:
13560 		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
13561 			dst, err);
13562 		return -ENOMEM;
13563 	default:
13564 		verifier_bug(env, "unknown reason (%d)", reason);
13565 		break;
13566 	}
13567 
13568 	return -EACCES;
13569 }
13570 
13571 /* check that stack access falls within stack limits and that 'reg' doesn't
13572  * have a variable offset.
13573  *
13574  * Variable offset is prohibited for unprivileged mode for simplicity since it
13575  * requires corresponding support in Spectre masking for stack ALU.  See also
13576  * retrieve_ptr_limit().
13577  */
13578 static int check_stack_access_for_ptr_arithmetic(
13579 				struct bpf_verifier_env *env,
13580 				int regno,
13581 				const struct bpf_reg_state *reg,
13582 				int off)
13583 {
13584 	if (!tnum_is_const(reg->var_off)) {
13585 		char tn_buf[48];
13586 
13587 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
13588 		verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
13589 			regno, tn_buf, off);
13590 		return -EACCES;
13591 	}
13592 
13593 	if (off >= 0 || off < -MAX_BPF_STACK) {
13594 		verbose(env, "R%d stack pointer arithmetic goes out of range, "
13595 			"prohibited for !root; off=%d\n", regno, off);
13596 		return -EACCES;
13597 	}
13598 
13599 	return 0;
13600 }
13601 
13602 static int sanitize_check_bounds(struct bpf_verifier_env *env,
13603 				 const struct bpf_insn *insn,
13604 				 struct bpf_reg_state *dst_reg)
13605 {
13606 	u32 dst = insn->dst_reg;
13607 
13608 	/* For unprivileged we require that resulting offset must be in bounds
13609 	 * in order to be able to sanitize access later on.
13610 	 */
13611 	if (env->bypass_spec_v1)
13612 		return 0;
13613 
13614 	switch (dst_reg->type) {
13615 	case PTR_TO_STACK:
13616 		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
13617 							  dst_reg->var_off.value))
13618 			return -EACCES;
13619 		break;
13620 	case PTR_TO_MAP_VALUE:
13621 		if (check_map_access(env, dst_reg, argno_from_reg(dst), 0, 1, false, ACCESS_HELPER)) {
13622 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
13623 				"prohibited for !root\n", dst);
13624 			return -EACCES;
13625 		}
13626 		break;
13627 	default:
13628 		return -EOPNOTSUPP;
13629 	}
13630 
13631 	return 0;
13632 }
13633 
13634 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
13635  * Caller should also handle BPF_MOV case separately.
13636  * If we return -EACCES, caller may want to try again treating pointer as a
13637  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
13638  */
13639 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
13640 				   struct bpf_insn *insn,
13641 				   const struct bpf_reg_state *ptr_reg,
13642 				   const struct bpf_reg_state *off_reg)
13643 {
13644 	struct bpf_verifier_state *vstate = env->cur_state;
13645 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
13646 	struct bpf_reg_state *regs = state->regs, *dst_reg;
13647 	bool known = tnum_is_const(off_reg->var_off);
13648 	s64 smin_val = reg_smin(off_reg), smax_val = reg_smax(off_reg);
13649 	u64 umin_val = reg_umin(off_reg), umax_val = reg_umax(off_reg);
13650 	struct bpf_sanitize_info info = {};
13651 	u8 opcode = BPF_OP(insn->code);
13652 	u32 dst = insn->dst_reg;
13653 	int ret, bounds_ret;
13654 
13655 	dst_reg = &regs[dst];
13656 
13657 	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
13658 	    smin_val > smax_val || umin_val > umax_val) {
13659 		/* Taint dst register if offset had invalid bounds derived from
13660 		 * e.g. dead branches.
13661 		 */
13662 		__mark_reg_unknown(env, dst_reg);
13663 		return 0;
13664 	}
13665 
13666 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
13667 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
13668 		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
13669 			__mark_reg_unknown(env, dst_reg);
13670 			return 0;
13671 		}
13672 
13673 		verbose(env,
13674 			"R%d 32-bit pointer arithmetic prohibited\n",
13675 			dst);
13676 		return -EACCES;
13677 	}
13678 
13679 	if (ptr_reg->type & PTR_MAYBE_NULL) {
13680 		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
13681 			dst, reg_type_str(env, ptr_reg->type));
13682 		return -EACCES;
13683 	}
13684 
13685 	/*
13686 	 * Accesses to untrusted PTR_TO_MEM are done through probe
13687 	 * instructions, hence no need to track offsets.
13688 	 */
13689 	if (base_type(ptr_reg->type) == PTR_TO_MEM && (ptr_reg->type & PTR_UNTRUSTED))
13690 		return 0;
13691 
13692 	switch (base_type(ptr_reg->type)) {
13693 	case PTR_TO_CTX:
13694 	case PTR_TO_MAP_VALUE:
13695 	case PTR_TO_MAP_KEY:
13696 	case PTR_TO_STACK:
13697 	case PTR_TO_PACKET_META:
13698 	case PTR_TO_PACKET:
13699 	case PTR_TO_TP_BUFFER:
13700 	case PTR_TO_BTF_ID:
13701 	case PTR_TO_MEM:
13702 	case PTR_TO_BUF:
13703 	case PTR_TO_FUNC:
13704 	case CONST_PTR_TO_DYNPTR:
13705 		break;
13706 	case PTR_TO_FLOW_KEYS:
13707 		if (known)
13708 			break;
13709 		fallthrough;
13710 	case CONST_PTR_TO_MAP:
13711 		/* smin_val represents the known value */
13712 		if (known && smin_val == 0 && opcode == BPF_ADD)
13713 			break;
13714 		fallthrough;
13715 	default:
13716 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
13717 			dst, reg_type_str(env, ptr_reg->type));
13718 		return -EACCES;
13719 	}
13720 
13721 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
13722 	 * The id may be overwritten later if we create a new variable offset.
13723 	 */
13724 	dst_reg->type = ptr_reg->type;
13725 	dst_reg->id = ptr_reg->id;
13726 
13727 	if (!check_reg_sane_offset_scalar(env, off_reg, ptr_reg->type) ||
13728 	    !check_reg_sane_offset_ptr(env, ptr_reg, ptr_reg->type))
13729 		return -EINVAL;
13730 
13731 	/* pointer types do not carry 32-bit bounds at the moment. */
13732 	__mark_reg32_unbounded(dst_reg);
13733 
13734 	if (sanitize_needed(opcode)) {
13735 		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
13736 				       &info, false);
13737 		if (ret < 0)
13738 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
13739 	}
13740 
13741 	switch (opcode) {
13742 	case BPF_ADD:
13743 		/*
13744 		 * dst_reg gets the pointer type and since some positive
13745 		 * integer value was added to the pointer, give it a new 'id'
13746 		 * if it's a PTR_TO_PACKET.
13747 		 * this creates a new 'base' pointer, off_reg (variable) gets
13748 		 * added into the variable offset, and we copy the fixed offset
13749 		 * from ptr_reg.
13750 		 */
13751 		dst_reg->r64 = cnum64_add(ptr_reg->r64, off_reg->r64);
13752 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
13753 		dst_reg->raw = ptr_reg->raw;
13754 		if (reg_is_pkt_pointer(ptr_reg)) {
13755 			if (!known)
13756 				dst_reg->id = ++env->id_gen;
13757 			/*
13758 			 * Clear range for unknown addends since we can't know
13759 			 * where the pkt pointer ended up. Also clear AT_PKT_END /
13760 			 * BEYOND_PKT_END from prior comparison as any pointer
13761 			 * arithmetic invalidates them.
13762 			 */
13763 			if (!known || dst_reg->range < 0)
13764 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
13765 		}
13766 		break;
13767 	case BPF_SUB:
13768 		if (dst_reg == off_reg) {
13769 			/* scalar -= pointer.  Creates an unknown scalar */
13770 			verbose(env, "R%d tried to subtract pointer from scalar\n",
13771 				dst);
13772 			return -EACCES;
13773 		}
13774 		/* We don't allow subtraction from FP, because (according to
13775 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
13776 		 * be able to deal with it.
13777 		 */
13778 		if (ptr_reg->type == PTR_TO_STACK) {
13779 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
13780 				dst);
13781 			return -EACCES;
13782 		}
13783 		dst_reg->r64 = cnum64_add(ptr_reg->r64, cnum64_negate(off_reg->r64));
13784 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
13785 		dst_reg->raw = ptr_reg->raw;
13786 		if (reg_is_pkt_pointer(ptr_reg)) {
13787 			if (!known)
13788 				dst_reg->id = ++env->id_gen;
13789 			/*
13790 			 * Clear range if the subtrahend may be negative since
13791 			 * pkt pointer could move past its bounds. A positive
13792 			 * subtrahend moves it backwards keeping positive range
13793 			 * intact. Also clear AT_PKT_END / BEYOND_PKT_END from
13794 			 * prior comparison as arithmetic invalidates them.
13795 			 */
13796 			if ((!known && smin_val < 0) || dst_reg->range < 0)
13797 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
13798 		}
13799 		break;
13800 	case BPF_AND:
13801 	case BPF_OR:
13802 	case BPF_XOR:
13803 		/* bitwise ops on pointers are troublesome, prohibit. */
13804 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
13805 			dst, bpf_alu_string[opcode >> 4]);
13806 		return -EACCES;
13807 	default:
13808 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
13809 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
13810 			dst, bpf_alu_string[opcode >> 4]);
13811 		return -EACCES;
13812 	}
13813 
13814 	if (!check_reg_sane_offset_ptr(env, dst_reg, ptr_reg->type))
13815 		return -EINVAL;
13816 	reg_bounds_sync(dst_reg);
13817 	bounds_ret = sanitize_check_bounds(env, insn, dst_reg);
13818 	if (bounds_ret == -EACCES)
13819 		return bounds_ret;
13820 	if (sanitize_needed(opcode)) {
13821 		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
13822 				       &info, true);
13823 		if (verifier_bug_if(!can_skip_alu_sanitation(env, insn)
13824 				    && !env->cur_state->speculative
13825 				    && bounds_ret
13826 				    && !ret,
13827 				    env, "Pointer type unsupported by sanitize_check_bounds() not rejected by retrieve_ptr_limit() as required")) {
13828 			return -EFAULT;
13829 		}
13830 		if (ret < 0)
13831 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
13832 	}
13833 
13834 	return 0;
13835 }
13836 
13837 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
13838 				 struct bpf_reg_state *src_reg)
13839 {
13840 	dst_reg->r32 = cnum32_add(dst_reg->r32, src_reg->r32);
13841 }
13842 
13843 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
13844 			       struct bpf_reg_state *src_reg)
13845 {
13846 	dst_reg->r64 = cnum64_add(dst_reg->r64, src_reg->r64);
13847 }
13848 
13849 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
13850 				 struct bpf_reg_state *src_reg)
13851 {
13852 	dst_reg->r32 = cnum32_add(dst_reg->r32, cnum32_negate(src_reg->r32));
13853 }
13854 
13855 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
13856 			       struct bpf_reg_state *src_reg)
13857 {
13858 	dst_reg->r64 = cnum64_add(dst_reg->r64, cnum64_negate(src_reg->r64));
13859 }
13860 
13861 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
13862 				 struct bpf_reg_state *src_reg)
13863 {
13864 	s32 smin = reg_s32_min(dst_reg);
13865 	s32 smax = reg_s32_max(dst_reg);
13866 	u32 umin = reg_u32_min(dst_reg);
13867 	u32 umax = reg_u32_max(dst_reg);
13868 	s32 tmp_prod[4];
13869 
13870 	if (check_mul_overflow(umax, reg_u32_max(src_reg), &umax) ||
13871 	    check_mul_overflow(umin, reg_u32_min(src_reg), &umin)) {
13872 		/* Overflow possible, we know nothing */
13873 		umin = 0;
13874 		umax = U32_MAX;
13875 	}
13876 	if (check_mul_overflow(smin, reg_s32_min(src_reg), &tmp_prod[0]) ||
13877 	    check_mul_overflow(smin, reg_s32_max(src_reg), &tmp_prod[1]) ||
13878 	    check_mul_overflow(smax, reg_s32_min(src_reg), &tmp_prod[2]) ||
13879 	    check_mul_overflow(smax, reg_s32_max(src_reg), &tmp_prod[3])) {
13880 		/* Overflow possible, we know nothing */
13881 		smin = S32_MIN;
13882 		smax = S32_MAX;
13883 	} else {
13884 		smin = min_array(tmp_prod, 4);
13885 		smax = max_array(tmp_prod, 4);
13886 	}
13887 
13888 	dst_reg->r32 = cnum32_intersect(cnum32_from_urange(umin, umax),
13889 					cnum32_from_srange(smin, smax));
13890 }
13891 
13892 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
13893 			       struct bpf_reg_state *src_reg)
13894 {
13895 	s64 smin = reg_smin(dst_reg);
13896 	s64 smax = reg_smax(dst_reg);
13897 	u64 umin = reg_umin(dst_reg);
13898 	u64 umax = reg_umax(dst_reg);
13899 	s64 tmp_prod[4];
13900 
13901 	if (check_mul_overflow(umax, reg_umax(src_reg), &umax) ||
13902 	    check_mul_overflow(umin, reg_umin(src_reg), &umin)) {
13903 		/* Overflow possible, we know nothing */
13904 		umin = 0;
13905 		umax = U64_MAX;
13906 	}
13907 	if (check_mul_overflow(smin, reg_smin(src_reg), &tmp_prod[0]) ||
13908 	    check_mul_overflow(smin, reg_smax(src_reg), &tmp_prod[1]) ||
13909 	    check_mul_overflow(smax, reg_smin(src_reg), &tmp_prod[2]) ||
13910 	    check_mul_overflow(smax, reg_smax(src_reg), &tmp_prod[3])) {
13911 		/* Overflow possible, we know nothing */
13912 		smin = S64_MIN;
13913 		smax = S64_MAX;
13914 	} else {
13915 		smin = min_array(tmp_prod, 4);
13916 		smax = max_array(tmp_prod, 4);
13917 	}
13918 
13919 	dst_reg->r64 = cnum64_intersect(cnum64_from_urange(umin, umax),
13920 					cnum64_from_srange(smin, smax));
13921 }
13922 
13923 static void scalar32_min_max_udiv(struct bpf_reg_state *dst_reg,
13924 				  struct bpf_reg_state *src_reg)
13925 {
13926 	u32 src_val = reg_u32_min(src_reg); /* non-zero, const divisor */
13927 
13928 	reg_set_urange32(dst_reg, reg_u32_min(dst_reg) / src_val,
13929 			 reg_u32_max(dst_reg) / src_val);
13930 
13931 	/* Reset other ranges/tnum to unbounded/unknown. */
13932 	reset_reg64_and_tnum(dst_reg);
13933 }
13934 
13935 static void scalar_min_max_udiv(struct bpf_reg_state *dst_reg,
13936 				struct bpf_reg_state *src_reg)
13937 {
13938 	u64 src_val = reg_umin(src_reg); /* non-zero, const divisor */
13939 
13940 	reg_set_urange64(dst_reg, div64_u64(reg_umin(dst_reg), src_val),
13941 			 div64_u64(reg_umax(dst_reg), src_val));
13942 
13943 	/* Reset other ranges/tnum to unbounded/unknown. */
13944 	reset_reg32_and_tnum(dst_reg);
13945 }
13946 
13947 static void scalar32_min_max_sdiv(struct bpf_reg_state *dst_reg,
13948 				  struct bpf_reg_state *src_reg)
13949 {
13950 	s32 smin = reg_s32_min(dst_reg);
13951 	s32 smax = reg_s32_max(dst_reg);
13952 	s32 src_val = reg_s32_min(src_reg); /* non-zero, const divisor */
13953 	s32 res1, res2;
13954 
13955 	/* BPF div specification: S32_MIN / -1 = S32_MIN */
13956 	if (smin == S32_MIN && src_val == -1) {
13957 		/*
13958 		 * If the dividend range contains more than just S32_MIN,
13959 		 * we cannot precisely track the result, so it becomes unbounded.
13960 		 * e.g., [S32_MIN, S32_MIN+10]/(-1),
13961 		 *     = {S32_MIN} U [-(S32_MIN+10), -(S32_MIN+1)]
13962 		 *     = {S32_MIN} U [S32_MAX-9, S32_MAX] = [S32_MIN, S32_MAX]
13963 		 * Otherwise (if dividend is exactly S32_MIN), result remains S32_MIN.
13964 		 */
13965 		if (smax != S32_MIN) {
13966 			smin = S32_MIN;
13967 			smax = S32_MAX;
13968 		}
13969 		goto reset;
13970 	}
13971 
13972 	res1 = smin / src_val;
13973 	res2 = smax / src_val;
13974 	smin = min(res1, res2);
13975 	smax = max(res1, res2);
13976 
13977 reset:
13978 	reg_set_srange32(dst_reg, smin, smax);
13979 	/* Reset other ranges/tnum to unbounded/unknown. */
13980 	reset_reg64_and_tnum(dst_reg);
13981 }
13982 
13983 static void scalar_min_max_sdiv(struct bpf_reg_state *dst_reg,
13984 				struct bpf_reg_state *src_reg)
13985 {
13986 	s64 smin = reg_smin(dst_reg);
13987 	s64 smax = reg_smax(dst_reg);
13988 	s64 src_val = reg_smin(src_reg); /* non-zero, const divisor */
13989 	s64 res1, res2;
13990 
13991 	/* BPF div specification: S64_MIN / -1 = S64_MIN */
13992 	if (smin == S64_MIN && src_val == -1) {
13993 		/*
13994 		 * If the dividend range contains more than just S64_MIN,
13995 		 * we cannot precisely track the result, so it becomes unbounded.
13996 		 * e.g., [S64_MIN, S64_MIN+10]/(-1),
13997 		 *     = {S64_MIN} U [-(S64_MIN+10), -(S64_MIN+1)]
13998 		 *     = {S64_MIN} U [S64_MAX-9, S64_MAX] = [S64_MIN, S64_MAX]
13999 		 * Otherwise (if dividend is exactly S64_MIN), result remains S64_MIN.
14000 		 */
14001 		if (smax != S64_MIN) {
14002 			smin = S64_MIN;
14003 			smax = S64_MAX;
14004 		}
14005 		goto reset;
14006 	}
14007 
14008 	res1 = div64_s64(smin, src_val);
14009 	res2 = div64_s64(smax, src_val);
14010 	smin = min(res1, res2);
14011 	smax = max(res1, res2);
14012 
14013 reset:
14014 	reg_set_srange64(dst_reg, smin, smax);
14015 	/* Reset other ranges/tnum to unbounded/unknown. */
14016 	reset_reg32_and_tnum(dst_reg);
14017 }
14018 
14019 static void scalar32_min_max_umod(struct bpf_reg_state *dst_reg,
14020 				  struct bpf_reg_state *src_reg)
14021 {
14022 	u32 src_val = reg_u32_min(src_reg); /* non-zero, const divisor */
14023 	u32 res_max = src_val - 1;
14024 
14025 	/*
14026 	 * If dst_umax <= res_max, the result remains unchanged.
14027 	 * e.g., [2, 5] % 10 = [2, 5].
14028 	 */
14029 	if (reg_u32_max(dst_reg) <= res_max)
14030 		return;
14031 
14032 	reg_set_urange32(dst_reg, 0, min(reg_u32_max(dst_reg), res_max));
14033 
14034 	/* Reset other ranges/tnum to unbounded/unknown. */
14035 	reset_reg64_and_tnum(dst_reg);
14036 }
14037 
14038 static void scalar_min_max_umod(struct bpf_reg_state *dst_reg,
14039 				struct bpf_reg_state *src_reg)
14040 {
14041 	u64 src_val = reg_umin(src_reg); /* non-zero, const divisor */
14042 	u64 res_max = src_val - 1;
14043 
14044 	/*
14045 	 * If dst_umax <= res_max, the result remains unchanged.
14046 	 * e.g., [2, 5] % 10 = [2, 5].
14047 	 */
14048 	if (reg_umax(dst_reg) <= res_max)
14049 		return;
14050 
14051 	reg_set_urange64(dst_reg, 0, min(reg_umax(dst_reg), res_max));
14052 
14053 	/* Reset other ranges/tnum to unbounded/unknown. */
14054 	reset_reg32_and_tnum(dst_reg);
14055 }
14056 
14057 static void scalar32_min_max_smod(struct bpf_reg_state *dst_reg,
14058 				  struct bpf_reg_state *src_reg)
14059 {
14060 	s32 src_val = reg_s32_min(src_reg); /* non-zero, const divisor */
14061 
14062 	/*
14063 	 * Safe absolute value calculation:
14064 	 * If src_val == S32_MIN (-2147483648), src_abs becomes 2147483648.
14065 	 * Here use unsigned integer to avoid overflow.
14066 	 */
14067 	u32 src_abs = (src_val > 0) ? (u32)src_val : -(u32)src_val;
14068 
14069 	/*
14070 	 * Calculate the maximum possible absolute value of the result.
14071 	 * Even if src_abs is 2147483648 (S32_MIN), subtracting 1 gives
14072 	 * 2147483647 (S32_MAX), which fits perfectly in s32.
14073 	 */
14074 	s32 res_max_abs = src_abs - 1;
14075 
14076 	/*
14077 	 * If the dividend is already within the result range,
14078 	 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
14079 	 */
14080 	if (reg_s32_min(dst_reg) >= -res_max_abs && reg_s32_max(dst_reg) <= res_max_abs)
14081 		return;
14082 
14083 	/* General case: result has the same sign as the dividend. */
14084 	if (reg_s32_min(dst_reg) >= 0) {
14085 		reg_set_srange32(dst_reg, 0, min(reg_s32_max(dst_reg), res_max_abs));
14086 	} else if (reg_s32_max(dst_reg) <= 0) {
14087 		reg_set_srange32(dst_reg, max(reg_s32_min(dst_reg), -res_max_abs), 0);
14088 	} else {
14089 		reg_set_srange32(dst_reg, -res_max_abs, res_max_abs);
14090 	}
14091 
14092 	/* Reset other ranges/tnum to unbounded/unknown. */
14093 	reset_reg64_and_tnum(dst_reg);
14094 }
14095 
14096 static void scalar_min_max_smod(struct bpf_reg_state *dst_reg,
14097 				struct bpf_reg_state *src_reg)
14098 {
14099 	s64 src_val = reg_smin(src_reg); /* non-zero, const divisor */
14100 
14101 	/*
14102 	 * Safe absolute value calculation:
14103 	 * If src_val == S64_MIN (-2^63), src_abs becomes 2^63.
14104 	 * Here use unsigned integer to avoid overflow.
14105 	 */
14106 	u64 src_abs = (src_val > 0) ? (u64)src_val : -(u64)src_val;
14107 
14108 	/*
14109 	 * Calculate the maximum possible absolute value of the result.
14110 	 * Even if src_abs is 2^63 (S64_MIN), subtracting 1 gives
14111 	 * 2^63 - 1 (S64_MAX), which fits perfectly in s64.
14112 	 */
14113 	s64 res_max_abs = src_abs - 1;
14114 
14115 	/*
14116 	 * If the dividend is already within the result range,
14117 	 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
14118 	 */
14119 	if (reg_smin(dst_reg) >= -res_max_abs && reg_smax(dst_reg) <= res_max_abs)
14120 		return;
14121 
14122 	/* General case: result has the same sign as the dividend. */
14123 	if (reg_smin(dst_reg) >= 0) {
14124 		reg_set_srange64(dst_reg, 0, min(reg_smax(dst_reg), res_max_abs));
14125 	} else if (reg_smax(dst_reg) <= 0) {
14126 		reg_set_srange64(dst_reg, max(reg_smin(dst_reg), -res_max_abs), 0);
14127 	} else {
14128 		reg_set_srange64(dst_reg, -res_max_abs, res_max_abs);
14129 	}
14130 
14131 	/* Reset other ranges/tnum to unbounded/unknown. */
14132 	reset_reg32_and_tnum(dst_reg);
14133 }
14134 
14135 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
14136 				 struct bpf_reg_state *src_reg)
14137 {
14138 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14139 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14140 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14141 	u32 umax_val = reg_u32_max(src_reg);
14142 
14143 	if (src_known && dst_known) {
14144 		__mark_reg32_known(dst_reg, var32_off.value);
14145 		return;
14146 	}
14147 
14148 	/* We get our minimum from the var_off, since that's inherently
14149 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
14150 	 */
14151 	reg_set_urange32(dst_reg,
14152 			 var32_off.value,
14153 			 min(reg_u32_max(dst_reg), umax_val));
14154 }
14155 
14156 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
14157 			       struct bpf_reg_state *src_reg)
14158 {
14159 	bool src_known = tnum_is_const(src_reg->var_off);
14160 	bool dst_known = tnum_is_const(dst_reg->var_off);
14161 	u64 umax_val = reg_umax(src_reg);
14162 
14163 	if (src_known && dst_known) {
14164 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14165 		return;
14166 	}
14167 
14168 	/* We get our minimum from the var_off, since that's inherently
14169 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
14170 	 */
14171 	reg_set_urange64(dst_reg,
14172 			 dst_reg->var_off.value,
14173 			 min(reg_umax(dst_reg), umax_val));
14174 
14175 	/* We may learn something more from the var_off */
14176 	__update_reg_bounds(dst_reg);
14177 }
14178 
14179 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
14180 				struct bpf_reg_state *src_reg)
14181 {
14182 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14183 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14184 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14185 	u32 umin_val = reg_u32_min(src_reg);
14186 
14187 	if (src_known && dst_known) {
14188 		__mark_reg32_known(dst_reg, var32_off.value);
14189 		return;
14190 	}
14191 
14192 	/* We get our maximum from the var_off, and our minimum is the
14193 	 * maximum of the operands' minima
14194 	 */
14195 	reg_set_urange32(dst_reg,
14196 			 max(reg_u32_min(dst_reg), umin_val),
14197 			 var32_off.value | var32_off.mask);
14198 }
14199 
14200 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
14201 			      struct bpf_reg_state *src_reg)
14202 {
14203 	bool src_known = tnum_is_const(src_reg->var_off);
14204 	bool dst_known = tnum_is_const(dst_reg->var_off);
14205 	u64 umin_val = reg_umin(src_reg);
14206 
14207 	if (src_known && dst_known) {
14208 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14209 		return;
14210 	}
14211 
14212 	/* We get our maximum from the var_off, and our minimum is the
14213 	 * maximum of the operands' minima
14214 	 */
14215 	reg_set_urange64(dst_reg,
14216 			 max(reg_umin(dst_reg), umin_val),
14217 			 dst_reg->var_off.value | dst_reg->var_off.mask);
14218 
14219 	/* We may learn something more from the var_off */
14220 	__update_reg_bounds(dst_reg);
14221 }
14222 
14223 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
14224 				 struct bpf_reg_state *src_reg)
14225 {
14226 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14227 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14228 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14229 
14230 	if (src_known && dst_known) {
14231 		__mark_reg32_known(dst_reg, var32_off.value);
14232 		return;
14233 	}
14234 
14235 	/* We get both minimum and maximum from the var32_off. */
14236 	reg_set_urange32(dst_reg, var32_off.value, var32_off.value | var32_off.mask);
14237 }
14238 
14239 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
14240 			       struct bpf_reg_state *src_reg)
14241 {
14242 	bool src_known = tnum_is_const(src_reg->var_off);
14243 	bool dst_known = tnum_is_const(dst_reg->var_off);
14244 
14245 	if (src_known && dst_known) {
14246 		/* dst_reg->var_off.value has been updated earlier */
14247 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14248 		return;
14249 	}
14250 
14251 	/* We get both minimum and maximum from the var_off. */
14252 	reg_set_urange64(dst_reg,
14253 			 dst_reg->var_off.value,
14254 			 dst_reg->var_off.value | dst_reg->var_off.mask);
14255 }
14256 
14257 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14258 				   u64 umin_val, u64 umax_val)
14259 {
14260 	/* If we might shift our top bit out, then we know nothing */
14261 	if (umax_val > 31 || reg_u32_max(dst_reg) > 1ULL << (31 - umax_val))
14262 		reg_set_urange32(dst_reg, 0, U32_MAX);
14263 	else
14264 		/* We lose all sign bit information (except what we can pick
14265 		 * up from var_off)
14266 		 */
14267 		reg_set_urange32(dst_reg, reg_u32_min(dst_reg) << umin_val,
14268 				 reg_u32_max(dst_reg) << umax_val);
14269 }
14270 
14271 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14272 				 struct bpf_reg_state *src_reg)
14273 {
14274 	u32 umax_val = reg_u32_max(src_reg);
14275 	u32 umin_val = reg_u32_min(src_reg);
14276 	/* u32 alu operation will zext upper bits */
14277 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
14278 
14279 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14280 	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
14281 	/* Not required but being careful mark reg64 bounds as unknown so
14282 	 * that we are forced to pick them up from tnum and zext later and
14283 	 * if some path skips this step we are still safe.
14284 	 */
14285 	__mark_reg64_unbounded(dst_reg);
14286 	__update_reg32_bounds(dst_reg);
14287 }
14288 
14289 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
14290 				   u64 umin_val, u64 umax_val)
14291 {
14292 	struct cnum64 u, s;
14293 
14294 	/* Special case <<32 because it is a common compiler pattern to sign
14295 	 * extend subreg by doing <<32 s>>32. smin/smax assignments are correct
14296 	 * because s32 bounds don't flip sign when shifting to the left by
14297 	 * 32bits.
14298 	 */
14299 	if (umin_val == 32 && umax_val == 32)
14300 		s = cnum64_from_srange((s64)reg_s32_min(dst_reg) << 32,
14301 				       (s64)reg_s32_max(dst_reg) << 32);
14302 	else
14303 		s = CNUM64_UNBOUNDED;
14304 
14305 	/* If we might shift our top bit out, then we know nothing */
14306 	if (reg_umax(dst_reg) > 1ULL << (63 - umax_val))
14307 		u = CNUM64_UNBOUNDED;
14308 	else
14309 		u = cnum64_from_urange(reg_umin(dst_reg) << umin_val,
14310 				       reg_umax(dst_reg) << umax_val);
14311 
14312 	dst_reg->r64 = cnum64_intersect(u, s);
14313 }
14314 
14315 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
14316 			       struct bpf_reg_state *src_reg)
14317 {
14318 	u64 umax_val = reg_umax(src_reg);
14319 	u64 umin_val = reg_umin(src_reg);
14320 
14321 	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
14322 	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
14323 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14324 
14325 	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
14326 	/* We may learn something more from the var_off */
14327 	__update_reg_bounds(dst_reg);
14328 }
14329 
14330 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
14331 				 struct bpf_reg_state *src_reg)
14332 {
14333 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
14334 	u32 umax_val = reg_u32_max(src_reg);
14335 	u32 umin_val = reg_u32_min(src_reg);
14336 
14337 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
14338 	 * be negative, then either:
14339 	 * 1) src_reg might be zero, so the sign bit of the result is
14340 	 *    unknown, so we lose our signed bounds
14341 	 * 2) it's known negative, thus the unsigned bounds capture the
14342 	 *    signed bounds
14343 	 * 3) the signed bounds cross zero, so they tell us nothing
14344 	 *    about the result
14345 	 * If the value in dst_reg is known nonnegative, then again the
14346 	 * unsigned bounds capture the signed bounds.
14347 	 * Thus, in all cases it suffices to blow away our signed bounds
14348 	 * and rely on inferring new ones from the unsigned bounds and
14349 	 * var_off of the result.
14350 	 */
14351 
14352 	dst_reg->var_off = tnum_rshift(subreg, umin_val);
14353 	reg_set_urange32(dst_reg, reg_u32_min(dst_reg) >> umax_val,
14354 			 reg_u32_max(dst_reg) >> umin_val);
14355 
14356 	__mark_reg64_unbounded(dst_reg);
14357 	__update_reg32_bounds(dst_reg);
14358 }
14359 
14360 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
14361 			       struct bpf_reg_state *src_reg)
14362 {
14363 	u64 umax_val = reg_umax(src_reg);
14364 	u64 umin_val = reg_umin(src_reg);
14365 
14366 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
14367 	 * be negative, then either:
14368 	 * 1) src_reg might be zero, so the sign bit of the result is
14369 	 *    unknown, so we lose our signed bounds
14370 	 * 2) it's known negative, thus the unsigned bounds capture the
14371 	 *    signed bounds
14372 	 * 3) the signed bounds cross zero, so they tell us nothing
14373 	 *    about the result
14374 	 * If the value in dst_reg is known nonnegative, then again the
14375 	 * unsigned bounds capture the signed bounds.
14376 	 * Thus, in all cases it suffices to blow away our signed bounds
14377 	 * and rely on inferring new ones from the unsigned bounds and
14378 	 * var_off of the result.
14379 	 */
14380 	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
14381 	reg_set_urange64(dst_reg, reg_umin(dst_reg) >> umax_val,
14382 			 reg_umax(dst_reg) >> umin_val);
14383 
14384 	/* Its not easy to operate on alu32 bounds here because it depends
14385 	 * on bits being shifted in. Take easy way out and mark unbounded
14386 	 * so we can recalculate later from tnum.
14387 	 */
14388 	__mark_reg32_unbounded(dst_reg);
14389 	__update_reg_bounds(dst_reg);
14390 }
14391 
14392 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
14393 				  struct bpf_reg_state *src_reg)
14394 {
14395 	u64 umin_val = reg_u32_min(src_reg);
14396 
14397 	/* Upon reaching here, src_known is true and
14398 	 * umax_val is equal to umin_val.
14399 	 * Blow away the dst_reg umin_value/umax_value and rely on
14400 	 * dst_reg var_off to refine the result.
14401 	 */
14402 	reg_set_srange32(dst_reg,
14403 			 (u32)(((s32)reg_s32_min(dst_reg)) >> umin_val),
14404 			 (u32)(((s32)reg_s32_max(dst_reg)) >> umin_val));
14405 
14406 	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
14407 
14408 	__mark_reg64_unbounded(dst_reg);
14409 	__update_reg32_bounds(dst_reg);
14410 }
14411 
14412 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
14413 				struct bpf_reg_state *src_reg)
14414 {
14415 	u64 umin_val = reg_umin(src_reg);
14416 
14417 	/* Upon reaching here, src_known is true and umax_val is equal
14418 	 * to umin_val.
14419 	 */
14420 	reg_set_srange64(dst_reg, reg_smin(dst_reg) >> umin_val,
14421 			 reg_smax(dst_reg) >> umin_val);
14422 
14423 	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
14424 
14425 	/* Its not easy to operate on alu32 bounds here because it depends
14426 	 * on bits being shifted in from upper 32-bits. Take easy way out
14427 	 * and mark unbounded so we can recalculate later from tnum.
14428 	 */
14429 	__mark_reg32_unbounded(dst_reg);
14430 	__update_reg_bounds(dst_reg);
14431 }
14432 
14433 static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *insn)
14434 {
14435 	/*
14436 	 * Byte swap operation - update var_off using tnum_bswap.
14437 	 * Three cases:
14438 	 * 1. bswap(16|32|64): opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE)
14439 	 *    unconditional swap
14440 	 * 2. to_le(16|32|64): opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE)
14441 	 *    swap on big-endian, truncation or no-op on little-endian
14442 	 * 3. to_be(16|32|64): opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE)
14443 	 *    swap on little-endian, truncation or no-op on big-endian
14444 	 */
14445 
14446 	bool alu64 = BPF_CLASS(insn->code) == BPF_ALU64;
14447 	bool to_le = BPF_SRC(insn->code) == BPF_TO_LE;
14448 	bool is_big_endian;
14449 #ifdef CONFIG_CPU_BIG_ENDIAN
14450 	is_big_endian = true;
14451 #else
14452 	is_big_endian = false;
14453 #endif
14454 	/* Apply bswap if alu64 or switch between big-endian and little-endian machines */
14455 	bool need_bswap = alu64 || (to_le == is_big_endian);
14456 
14457 	/*
14458 	 * If the register is mutated, manually reset its scalar ID to break
14459 	 * any existing ties and avoid incorrect bounds propagation.
14460 	 */
14461 	if (need_bswap || insn->imm == 16 || insn->imm == 32)
14462 		clear_scalar_id(dst_reg);
14463 
14464 	if (need_bswap) {
14465 		if (insn->imm == 16)
14466 			dst_reg->var_off = tnum_bswap16(dst_reg->var_off);
14467 		else if (insn->imm == 32)
14468 			dst_reg->var_off = tnum_bswap32(dst_reg->var_off);
14469 		else if (insn->imm == 64)
14470 			dst_reg->var_off = tnum_bswap64(dst_reg->var_off);
14471 		/*
14472 		 * Byteswap scrambles the range, so we must reset bounds.
14473 		 * Bounds will be re-derived from the new tnum later.
14474 		 */
14475 		__mark_reg_unbounded(dst_reg);
14476 	}
14477 	/* For bswap16/32, truncate dst register to match the swapped size */
14478 	if (insn->imm == 16 || insn->imm == 32)
14479 		coerce_reg_to_size(dst_reg, insn->imm / 8);
14480 }
14481 
14482 static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
14483 					     const struct bpf_reg_state *src_reg)
14484 {
14485 	bool src_is_const = false;
14486 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
14487 
14488 	if (insn_bitness == 32) {
14489 		if (tnum_subreg_is_const(src_reg->var_off)
14490 		    && reg_s32_min(src_reg) == reg_s32_max(src_reg)
14491 		    && reg_u32_min(src_reg) == reg_u32_max(src_reg))
14492 			src_is_const = true;
14493 	} else {
14494 		if (tnum_is_const(src_reg->var_off)
14495 		    && reg_smin(src_reg) == reg_smax(src_reg)
14496 		    && reg_umin(src_reg) == reg_umax(src_reg))
14497 			src_is_const = true;
14498 	}
14499 
14500 	switch (BPF_OP(insn->code)) {
14501 	case BPF_ADD:
14502 	case BPF_SUB:
14503 	case BPF_NEG:
14504 	case BPF_AND:
14505 	case BPF_XOR:
14506 	case BPF_OR:
14507 	case BPF_MUL:
14508 	case BPF_END:
14509 		return true;
14510 
14511 	/*
14512 	 * Division and modulo operators range is only safe to compute when the
14513 	 * divisor is a constant.
14514 	 */
14515 	case BPF_DIV:
14516 	case BPF_MOD:
14517 		return src_is_const;
14518 
14519 	/* Shift operators range is only computable if shift dimension operand
14520 	 * is a constant. Shifts greater than 31 or 63 are undefined. This
14521 	 * includes shifts by a negative number.
14522 	 */
14523 	case BPF_LSH:
14524 	case BPF_RSH:
14525 	case BPF_ARSH:
14526 		return (src_is_const && reg_umax(src_reg) < insn_bitness);
14527 	default:
14528 		return false;
14529 	}
14530 }
14531 
14532 static int maybe_fork_scalars(struct bpf_verifier_env *env, struct bpf_insn *insn,
14533 			      struct bpf_reg_state *dst_reg)
14534 {
14535 	struct bpf_verifier_state *branch;
14536 	struct bpf_reg_state *regs;
14537 	bool alu32;
14538 
14539 	if (reg_smin(dst_reg) == -1 && reg_smax(dst_reg) == 0)
14540 		alu32 = false;
14541 	else if (reg_s32_min(dst_reg) == -1 && reg_s32_max(dst_reg) == 0)
14542 		alu32 = true;
14543 	else
14544 		return 0;
14545 
14546 	branch = push_stack(env, env->insn_idx, env->insn_idx, false);
14547 	if (IS_ERR(branch))
14548 		return PTR_ERR(branch);
14549 
14550 	regs = branch->frame[branch->curframe]->regs;
14551 	if (alu32) {
14552 		__mark_reg32_known(&regs[insn->dst_reg], 0);
14553 		__mark_reg32_known(dst_reg, -1ull);
14554 	} else {
14555 		__mark_reg_known(&regs[insn->dst_reg], 0);
14556 		__mark_reg_known(dst_reg, -1ull);
14557 	}
14558 	return 0;
14559 }
14560 
14561 /* WARNING: This function does calculations on 64-bit values, but the actual
14562  * execution may occur on 32-bit values. Therefore, things like bitshifts
14563  * need extra checks in the 32-bit case.
14564  */
14565 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
14566 				      struct bpf_insn *insn,
14567 				      struct bpf_reg_state *dst_reg,
14568 				      struct bpf_reg_state src_reg)
14569 {
14570 	u8 opcode = BPF_OP(insn->code);
14571 	s16 off = insn->off;
14572 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
14573 	int ret;
14574 
14575 	if (!is_safe_to_compute_dst_reg_range(insn, &src_reg)) {
14576 		__mark_reg_unknown(env, dst_reg);
14577 		return 0;
14578 	}
14579 
14580 	if (sanitize_needed(opcode)) {
14581 		ret = sanitize_val_alu(env, insn);
14582 		if (ret < 0)
14583 			return sanitize_err(env, insn, ret, NULL, NULL);
14584 	}
14585 
14586 	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
14587 	 * There are two classes of instructions: The first class we track both
14588 	 * alu32 and alu64 sign/unsigned bounds independently this provides the
14589 	 * greatest amount of precision when alu operations are mixed with jmp32
14590 	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
14591 	 * and BPF_OR. This is possible because these ops have fairly easy to
14592 	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
14593 	 * See alu32 verifier tests for examples. The second class of
14594 	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
14595 	 * with regards to tracking sign/unsigned bounds because the bits may
14596 	 * cross subreg boundaries in the alu64 case. When this happens we mark
14597 	 * the reg unbounded in the subreg bound space and use the resulting
14598 	 * tnum to calculate an approximation of the sign/unsigned bounds.
14599 	 */
14600 	switch (opcode) {
14601 	case BPF_ADD:
14602 		scalar32_min_max_add(dst_reg, &src_reg);
14603 		scalar_min_max_add(dst_reg, &src_reg);
14604 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
14605 		break;
14606 	case BPF_SUB:
14607 		scalar32_min_max_sub(dst_reg, &src_reg);
14608 		scalar_min_max_sub(dst_reg, &src_reg);
14609 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
14610 		break;
14611 	case BPF_NEG:
14612 		env->fake_reg[0] = *dst_reg;
14613 		__mark_reg_known(dst_reg, 0);
14614 		scalar32_min_max_sub(dst_reg, &env->fake_reg[0]);
14615 		scalar_min_max_sub(dst_reg, &env->fake_reg[0]);
14616 		dst_reg->var_off = tnum_neg(env->fake_reg[0].var_off);
14617 		break;
14618 	case BPF_MUL:
14619 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
14620 		scalar32_min_max_mul(dst_reg, &src_reg);
14621 		scalar_min_max_mul(dst_reg, &src_reg);
14622 		break;
14623 	case BPF_DIV:
14624 		/* BPF div specification: x / 0 = 0 */
14625 		if ((alu32 && reg_u32_min(&src_reg) == 0) || (!alu32 && reg_umin(&src_reg) == 0)) {
14626 			___mark_reg_known(dst_reg, 0);
14627 			break;
14628 		}
14629 		if (alu32)
14630 			if (off == 1)
14631 				scalar32_min_max_sdiv(dst_reg, &src_reg);
14632 			else
14633 				scalar32_min_max_udiv(dst_reg, &src_reg);
14634 		else
14635 			if (off == 1)
14636 				scalar_min_max_sdiv(dst_reg, &src_reg);
14637 			else
14638 				scalar_min_max_udiv(dst_reg, &src_reg);
14639 		break;
14640 	case BPF_MOD:
14641 		/* BPF mod specification: x % 0 = x */
14642 		if ((alu32 && reg_u32_min(&src_reg) == 0) || (!alu32 && reg_umin(&src_reg) == 0))
14643 			break;
14644 		if (alu32)
14645 			if (off == 1)
14646 				scalar32_min_max_smod(dst_reg, &src_reg);
14647 			else
14648 				scalar32_min_max_umod(dst_reg, &src_reg);
14649 		else
14650 			if (off == 1)
14651 				scalar_min_max_smod(dst_reg, &src_reg);
14652 			else
14653 				scalar_min_max_umod(dst_reg, &src_reg);
14654 		break;
14655 	case BPF_AND:
14656 		if (tnum_is_const(src_reg.var_off)) {
14657 			ret = maybe_fork_scalars(env, insn, dst_reg);
14658 			if (ret)
14659 				return ret;
14660 		}
14661 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
14662 		scalar32_min_max_and(dst_reg, &src_reg);
14663 		scalar_min_max_and(dst_reg, &src_reg);
14664 		break;
14665 	case BPF_OR:
14666 		if (tnum_is_const(src_reg.var_off)) {
14667 			ret = maybe_fork_scalars(env, insn, dst_reg);
14668 			if (ret)
14669 				return ret;
14670 		}
14671 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
14672 		scalar32_min_max_or(dst_reg, &src_reg);
14673 		scalar_min_max_or(dst_reg, &src_reg);
14674 		break;
14675 	case BPF_XOR:
14676 		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
14677 		scalar32_min_max_xor(dst_reg, &src_reg);
14678 		scalar_min_max_xor(dst_reg, &src_reg);
14679 		break;
14680 	case BPF_LSH:
14681 		if (alu32)
14682 			scalar32_min_max_lsh(dst_reg, &src_reg);
14683 		else
14684 			scalar_min_max_lsh(dst_reg, &src_reg);
14685 		break;
14686 	case BPF_RSH:
14687 		if (alu32)
14688 			scalar32_min_max_rsh(dst_reg, &src_reg);
14689 		else
14690 			scalar_min_max_rsh(dst_reg, &src_reg);
14691 		break;
14692 	case BPF_ARSH:
14693 		if (alu32)
14694 			scalar32_min_max_arsh(dst_reg, &src_reg);
14695 		else
14696 			scalar_min_max_arsh(dst_reg, &src_reg);
14697 		break;
14698 	case BPF_END:
14699 		scalar_byte_swap(dst_reg, insn);
14700 		break;
14701 	default:
14702 		break;
14703 	}
14704 
14705 	/*
14706 	 * ALU32 ops are zero extended into 64bit register.
14707 	 *
14708 	 * BPF_END is already handled inside the helper (truncation),
14709 	 * so skip zext here to avoid unexpected zero extension.
14710 	 * e.g., le64: opcode=(BPF_END|BPF_ALU|BPF_TO_LE), imm=0x40
14711 	 * This is a 64bit byte swap operation with alu32==true,
14712 	 * but we should not zero extend the result.
14713 	 */
14714 	if (alu32 && opcode != BPF_END)
14715 		zext_32_to_64(dst_reg);
14716 	reg_bounds_sync(dst_reg);
14717 	return 0;
14718 }
14719 
14720 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
14721  * and var_off.
14722  */
14723 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
14724 				   struct bpf_insn *insn)
14725 {
14726 	struct bpf_verifier_state *vstate = env->cur_state;
14727 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
14728 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
14729 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
14730 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
14731 	u8 opcode = BPF_OP(insn->code);
14732 	int err;
14733 
14734 	dst_reg = &regs[insn->dst_reg];
14735 	if (BPF_SRC(insn->code) == BPF_X)
14736 		src_reg = &regs[insn->src_reg];
14737 	else
14738 		src_reg = NULL;
14739 
14740 	/* Case where at least one operand is an arena. */
14741 	if (dst_reg->type == PTR_TO_ARENA || (src_reg && src_reg->type == PTR_TO_ARENA)) {
14742 		struct bpf_insn_aux_data *aux = cur_aux(env);
14743 
14744 		if (dst_reg->type != PTR_TO_ARENA)
14745 			*dst_reg = *src_reg;
14746 
14747 		dst_reg->subreg_def = env->insn_idx + 1;
14748 
14749 		if (BPF_CLASS(insn->code) == BPF_ALU64)
14750 			/*
14751 			 * 32-bit operations zero upper bits automatically.
14752 			 * 64-bit operations need to be converted to 32.
14753 			 */
14754 			aux->needs_zext = true;
14755 
14756 		/* Any arithmetic operations are allowed on arena pointers */
14757 		return 0;
14758 	}
14759 
14760 	if (dst_reg->type != SCALAR_VALUE)
14761 		ptr_reg = dst_reg;
14762 
14763 	if (BPF_SRC(insn->code) == BPF_X) {
14764 		if (src_reg->type != SCALAR_VALUE) {
14765 			if (dst_reg->type != SCALAR_VALUE) {
14766 				/* Combining two pointers by any ALU op yields
14767 				 * an arbitrary scalar. Disallow all math except
14768 				 * pointer subtraction
14769 				 */
14770 				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
14771 					mark_reg_unknown(env, regs, insn->dst_reg);
14772 					return 0;
14773 				}
14774 				verbose(env, "R%d pointer %s pointer prohibited\n",
14775 					insn->dst_reg,
14776 					bpf_alu_string[opcode >> 4]);
14777 				return -EACCES;
14778 			} else {
14779 				/* scalar += pointer
14780 				 * This is legal, but we have to reverse our
14781 				 * src/dest handling in computing the range
14782 				 */
14783 				err = mark_chain_precision(env, insn->dst_reg);
14784 				if (err)
14785 					return err;
14786 				return adjust_ptr_min_max_vals(env, insn,
14787 							       src_reg, dst_reg);
14788 			}
14789 		} else if (ptr_reg) {
14790 			/* pointer += scalar */
14791 			err = mark_chain_precision(env, insn->src_reg);
14792 			if (err)
14793 				return err;
14794 			return adjust_ptr_min_max_vals(env, insn,
14795 						       dst_reg, src_reg);
14796 		} else if (dst_reg->precise) {
14797 			/* if dst_reg is precise, src_reg should be precise as well */
14798 			err = mark_chain_precision(env, insn->src_reg);
14799 			if (err)
14800 				return err;
14801 		}
14802 	} else {
14803 		/* Pretend the src is a reg with a known value, since we only
14804 		 * need to be able to read from this state.
14805 		 */
14806 		off_reg.type = SCALAR_VALUE;
14807 		__mark_reg_known(&off_reg, insn->imm);
14808 		src_reg = &off_reg;
14809 		if (ptr_reg) /* pointer += K */
14810 			return adjust_ptr_min_max_vals(env, insn,
14811 						       ptr_reg, src_reg);
14812 	}
14813 
14814 	/* Got here implies adding two SCALAR_VALUEs */
14815 	if (WARN_ON_ONCE(ptr_reg)) {
14816 		print_verifier_state(env, vstate, vstate->curframe, true);
14817 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
14818 		return -EFAULT;
14819 	}
14820 	if (WARN_ON(!src_reg)) {
14821 		print_verifier_state(env, vstate, vstate->curframe, true);
14822 		verbose(env, "verifier internal error: no src_reg\n");
14823 		return -EFAULT;
14824 	}
14825 	/*
14826 	 * For alu32 linked register tracking, we need to check dst_reg's
14827 	 * umax_value before the ALU operation. After adjust_scalar_min_max_vals(),
14828 	 * alu32 ops will have zero-extended the result, making umax_value <= U32_MAX.
14829 	 */
14830 	u64 dst_umax = reg_umax(dst_reg);
14831 
14832 	err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
14833 	if (err)
14834 		return err;
14835 	/*
14836 	 * Compilers can generate the code
14837 	 * r1 = r2
14838 	 * r1 += 0x1
14839 	 * if r2 < 1000 goto ...
14840 	 * use r1 in memory access
14841 	 * So remember constant delta between r2 and r1 and update r1 after
14842 	 * 'if' condition.
14843 	 */
14844 	if (env->bpf_capable &&
14845 	    (BPF_OP(insn->code) == BPF_ADD || BPF_OP(insn->code) == BPF_SUB) &&
14846 	    dst_reg->id && is_reg_const(src_reg, alu32) &&
14847 	    !(BPF_SRC(insn->code) == BPF_X && insn->src_reg == insn->dst_reg)) {
14848 		u64 val = reg_const_value(src_reg, alu32);
14849 		s32 off;
14850 
14851 		if (!alu32 && ((s64)val < S32_MIN || (s64)val > S32_MAX))
14852 			goto clear_id;
14853 
14854 		if (alu32 && (dst_umax > U32_MAX))
14855 			goto clear_id;
14856 
14857 		off = (s32)val;
14858 
14859 		if (BPF_OP(insn->code) == BPF_SUB) {
14860 			/* Negating S32_MIN would overflow */
14861 			if (off == S32_MIN)
14862 				goto clear_id;
14863 			off = -off;
14864 		}
14865 
14866 		if (dst_reg->id & BPF_ADD_CONST) {
14867 			/*
14868 			 * If the register already went through rX += val
14869 			 * we cannot accumulate another val into rx->off.
14870 			 */
14871 clear_id:
14872 			clear_scalar_id(dst_reg);
14873 		} else {
14874 			if (alu32)
14875 				dst_reg->id |= BPF_ADD_CONST32;
14876 			else
14877 				dst_reg->id |= BPF_ADD_CONST64;
14878 			dst_reg->delta = off;
14879 		}
14880 	} else {
14881 		/*
14882 		 * Make sure ID is cleared otherwise dst_reg min/max could be
14883 		 * incorrectly propagated into other registers by sync_linked_regs()
14884 		 */
14885 		clear_scalar_id(dst_reg);
14886 	}
14887 	return 0;
14888 }
14889 
14890 /* check validity of 32-bit and 64-bit arithmetic operations */
14891 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
14892 {
14893 	struct bpf_reg_state *regs = cur_regs(env);
14894 	u8 opcode = BPF_OP(insn->code);
14895 	int err;
14896 
14897 	if (opcode == BPF_END || opcode == BPF_NEG) {
14898 		/* check src operand */
14899 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
14900 		if (err)
14901 			return err;
14902 
14903 		if (is_pointer_value(env, insn->dst_reg)) {
14904 			verbose(env, "R%d pointer arithmetic prohibited\n",
14905 				insn->dst_reg);
14906 			return -EACCES;
14907 		}
14908 
14909 		/* check dest operand */
14910 		if (regs[insn->dst_reg].type == SCALAR_VALUE) {
14911 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
14912 			err = err ?: adjust_scalar_min_max_vals(env, insn,
14913 							 &regs[insn->dst_reg],
14914 							 regs[insn->dst_reg]);
14915 		} else {
14916 			err = check_reg_arg(env, insn->dst_reg, DST_OP);
14917 		}
14918 		if (err)
14919 			return err;
14920 
14921 	} else if (opcode == BPF_MOV) {
14922 
14923 		if (BPF_SRC(insn->code) == BPF_X) {
14924 			if (insn->off == BPF_ADDR_SPACE_CAST) {
14925 				if (!env->prog->aux->arena) {
14926 					verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
14927 					return -EINVAL;
14928 				}
14929 			}
14930 
14931 			/* check src operand */
14932 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
14933 			if (err)
14934 				return err;
14935 		}
14936 
14937 		/* check dest operand, mark as required later */
14938 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
14939 		if (err)
14940 			return err;
14941 
14942 		if (BPF_SRC(insn->code) == BPF_X) {
14943 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
14944 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
14945 
14946 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
14947 				if (insn->imm) {
14948 					/* off == BPF_ADDR_SPACE_CAST */
14949 					mark_reg_unknown(env, regs, insn->dst_reg);
14950 					if (insn->imm == 1) { /* cast from as(1) to as(0) */
14951 						dst_reg->type = PTR_TO_ARENA;
14952 						/* PTR_TO_ARENA is 32-bit */
14953 						dst_reg->subreg_def = env->insn_idx + 1;
14954 					}
14955 				} else if (insn->off == 0) {
14956 					/* case: R1 = R2
14957 					 * copy register state to dest reg
14958 					 */
14959 					assign_scalar_id_before_mov(env, src_reg);
14960 					*dst_reg = *src_reg;
14961 					dst_reg->subreg_def = DEF_NOT_SUBREG;
14962 				} else {
14963 					/* case: R1 = (s8, s16 s32)R2 */
14964 					if (is_pointer_value(env, insn->src_reg)) {
14965 						verbose(env,
14966 							"R%d sign-extension part of pointer\n",
14967 							insn->src_reg);
14968 						return -EACCES;
14969 					} else if (src_reg->type == SCALAR_VALUE) {
14970 						bool no_sext;
14971 
14972 						no_sext = reg_umax(src_reg) < (1ULL << (insn->off - 1));
14973 						if (no_sext)
14974 							assign_scalar_id_before_mov(env, src_reg);
14975 						*dst_reg = *src_reg;
14976 						if (!no_sext)
14977 							clear_scalar_id(dst_reg);
14978 						coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
14979 						dst_reg->subreg_def = DEF_NOT_SUBREG;
14980 					} else {
14981 						mark_reg_unknown(env, regs, insn->dst_reg);
14982 					}
14983 				}
14984 			} else {
14985 				/* R1 = (u32) R2 */
14986 				if (is_pointer_value(env, insn->src_reg)) {
14987 					verbose(env,
14988 						"R%d partial copy of pointer\n",
14989 						insn->src_reg);
14990 					return -EACCES;
14991 				} else if (src_reg->type == SCALAR_VALUE) {
14992 					if (insn->off == 0) {
14993 						bool is_src_reg_u32 = get_reg_width(src_reg) <= 32;
14994 
14995 						if (is_src_reg_u32)
14996 							assign_scalar_id_before_mov(env, src_reg);
14997 						*dst_reg = *src_reg;
14998 						/* Make sure ID is cleared if src_reg is not in u32
14999 						 * range otherwise dst_reg min/max could be incorrectly
15000 						 * propagated into src_reg by sync_linked_regs()
15001 						 */
15002 						if (!is_src_reg_u32)
15003 							clear_scalar_id(dst_reg);
15004 						dst_reg->subreg_def = env->insn_idx + 1;
15005 					} else {
15006 						/* case: W1 = (s8, s16)W2 */
15007 						bool no_sext = reg_umax(src_reg) < (1ULL << (insn->off - 1));
15008 
15009 						if (no_sext)
15010 							assign_scalar_id_before_mov(env, src_reg);
15011 						*dst_reg = *src_reg;
15012 						if (!no_sext)
15013 							clear_scalar_id(dst_reg);
15014 						dst_reg->subreg_def = env->insn_idx + 1;
15015 						coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
15016 					}
15017 				} else {
15018 					mark_reg_unknown(env, regs,
15019 							 insn->dst_reg);
15020 				}
15021 				zext_32_to_64(dst_reg);
15022 				reg_bounds_sync(dst_reg);
15023 			}
15024 		} else {
15025 			/* case: R = imm
15026 			 * remember the value we stored into this reg
15027 			 */
15028 			/* clear any state __mark_reg_known doesn't set */
15029 			mark_reg_unknown(env, regs, insn->dst_reg);
15030 			regs[insn->dst_reg].type = SCALAR_VALUE;
15031 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
15032 				__mark_reg_known(regs + insn->dst_reg,
15033 						 insn->imm);
15034 			} else {
15035 				__mark_reg_known(regs + insn->dst_reg,
15036 						 (u32)insn->imm);
15037 			}
15038 		}
15039 
15040 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
15041 
15042 		if (BPF_SRC(insn->code) == BPF_X) {
15043 			/* check src1 operand */
15044 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
15045 			if (err)
15046 				return err;
15047 		}
15048 
15049 		/* check src2 operand */
15050 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15051 		if (err)
15052 			return err;
15053 
15054 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
15055 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
15056 			verbose(env, "div by zero\n");
15057 			return -EINVAL;
15058 		}
15059 
15060 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
15061 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
15062 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
15063 
15064 			if (insn->imm < 0 || insn->imm >= size) {
15065 				verbose(env, "invalid shift %d\n", insn->imm);
15066 				return -EINVAL;
15067 			}
15068 		}
15069 
15070 		/* check dest operand */
15071 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15072 		err = err ?: adjust_reg_min_max_vals(env, insn);
15073 		if (err)
15074 			return err;
15075 	}
15076 
15077 	return reg_bounds_sanity_check(env, &regs[insn->dst_reg], "alu");
15078 }
15079 
15080 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
15081 				   struct bpf_reg_state *dst_reg,
15082 				   enum bpf_reg_type type,
15083 				   bool range_right_open)
15084 {
15085 	struct bpf_func_state *state;
15086 	struct bpf_reg_state *reg;
15087 	int new_range;
15088 
15089 	if (reg_umax(dst_reg) == 0 && range_right_open)
15090 		/* This doesn't give us any range */
15091 		return;
15092 
15093 	if (reg_umax(dst_reg) > MAX_PACKET_OFF)
15094 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
15095 		 * than pkt_end, but that's because it's also less than pkt.
15096 		 */
15097 		return;
15098 
15099 	new_range = reg_umax(dst_reg);
15100 	if (range_right_open)
15101 		new_range++;
15102 
15103 	/* Examples for register markings:
15104 	 *
15105 	 * pkt_data in dst register:
15106 	 *
15107 	 *   r2 = r3;
15108 	 *   r2 += 8;
15109 	 *   if (r2 > pkt_end) goto <handle exception>
15110 	 *   <access okay>
15111 	 *
15112 	 *   r2 = r3;
15113 	 *   r2 += 8;
15114 	 *   if (r2 < pkt_end) goto <access okay>
15115 	 *   <handle exception>
15116 	 *
15117 	 *   Where:
15118 	 *     r2 == dst_reg, pkt_end == src_reg
15119 	 *     r2=pkt(id=n,off=8,r=0)
15120 	 *     r3=pkt(id=n,off=0,r=0)
15121 	 *
15122 	 * pkt_data in src register:
15123 	 *
15124 	 *   r2 = r3;
15125 	 *   r2 += 8;
15126 	 *   if (pkt_end >= r2) goto <access okay>
15127 	 *   <handle exception>
15128 	 *
15129 	 *   r2 = r3;
15130 	 *   r2 += 8;
15131 	 *   if (pkt_end <= r2) goto <handle exception>
15132 	 *   <access okay>
15133 	 *
15134 	 *   Where:
15135 	 *     pkt_end == dst_reg, r2 == src_reg
15136 	 *     r2=pkt(id=n,off=8,r=0)
15137 	 *     r3=pkt(id=n,off=0,r=0)
15138 	 *
15139 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
15140 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
15141 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
15142 	 * the check.
15143 	 */
15144 
15145 	/* If our ids match, then we must have the same max_value.  And we
15146 	 * don't care about the other reg's fixed offset, since if it's too big
15147 	 * the range won't allow anything.
15148 	 * reg_umax(dst_reg) is known < MAX_PACKET_OFF, therefore it fits in a u16.
15149 	 */
15150 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15151 		if (reg->type == type && reg->id == dst_reg->id)
15152 			/* keep the maximum range already checked */
15153 			reg->range = max(reg->range, new_range);
15154 	}));
15155 }
15156 
15157 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15158 				u8 opcode, bool is_jmp32);
15159 static u8 rev_opcode(u8 opcode);
15160 
15161 /*
15162  * Learn more information about live branches by simulating refinement on both branches.
15163  * regs_refine_cond_op() is sound, so producing ill-formed register bounds for the branch means
15164  * that branch is dead.
15165  */
15166 static int simulate_both_branches_taken(struct bpf_verifier_env *env, u8 opcode, bool is_jmp32)
15167 {
15168 	/* Fallthrough (FALSE) branch */
15169 	regs_refine_cond_op(&env->false_reg1, &env->false_reg2, rev_opcode(opcode), is_jmp32);
15170 	reg_bounds_sync(&env->false_reg1);
15171 	reg_bounds_sync(&env->false_reg2);
15172 	/*
15173 	 * If there is a range bounds violation in *any* of the abstract values in either
15174 	 * reg_states in the FALSE branch (i.e. reg1, reg2), the FALSE branch must be dead. Only
15175 	 * TRUE branch will be taken.
15176 	 */
15177 	if (range_bounds_violation(&env->false_reg1) || range_bounds_violation(&env->false_reg2))
15178 		return 1;
15179 
15180 	/* Jump (TRUE) branch */
15181 	regs_refine_cond_op(&env->true_reg1, &env->true_reg2, opcode, is_jmp32);
15182 	reg_bounds_sync(&env->true_reg1);
15183 	reg_bounds_sync(&env->true_reg2);
15184 	/*
15185 	 * If there is a range bounds violation in *any* of the abstract values in either
15186 	 * reg_states in the TRUE branch (i.e. true_reg1, true_reg2), the TRUE branch must be dead.
15187 	 * Only FALSE branch will be taken.
15188 	 */
15189 	if (range_bounds_violation(&env->true_reg1) || range_bounds_violation(&env->true_reg2))
15190 		return 0;
15191 
15192 	/* Both branches are possible, we can't determine which one will be taken. */
15193 	return -1;
15194 }
15195 
15196 /*
15197  * <reg1> <op> <reg2>, currently assuming reg2 is a constant
15198  */
15199 static int is_scalar_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1,
15200 				  struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32)
15201 {
15202 	struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off;
15203 	struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off;
15204 	u64 umin1 = is_jmp32 ? (u64)reg_u32_min(reg1) : reg_umin(reg1);
15205 	u64 umax1 = is_jmp32 ? (u64)reg_u32_max(reg1) : reg_umax(reg1);
15206 	s64 smin1 = is_jmp32 ? (s64)reg_s32_min(reg1) : reg_smin(reg1);
15207 	s64 smax1 = is_jmp32 ? (s64)reg_s32_max(reg1) : reg_smax(reg1);
15208 	u64 umin2 = is_jmp32 ? (u64)reg_u32_min(reg2) : reg_umin(reg2);
15209 	u64 umax2 = is_jmp32 ? (u64)reg_u32_max(reg2) : reg_umax(reg2);
15210 	s64 smin2 = is_jmp32 ? (s64)reg_s32_min(reg2) : reg_smin(reg2);
15211 	s64 smax2 = is_jmp32 ? (s64)reg_s32_max(reg2) : reg_smax(reg2);
15212 
15213 	if (reg1 == reg2) {
15214 		switch (opcode) {
15215 		case BPF_JGE:
15216 		case BPF_JLE:
15217 		case BPF_JSGE:
15218 		case BPF_JSLE:
15219 		case BPF_JEQ:
15220 			return 1;
15221 		case BPF_JGT:
15222 		case BPF_JLT:
15223 		case BPF_JSGT:
15224 		case BPF_JSLT:
15225 		case BPF_JNE:
15226 			return 0;
15227 		case BPF_JSET:
15228 			if (tnum_is_const(t1))
15229 				return t1.value != 0;
15230 			else
15231 				return (smin1 <= 0 && smax1 >= 0) ? -1 : 1;
15232 		default:
15233 			return -1;
15234 		}
15235 	}
15236 
15237 	switch (opcode) {
15238 	case BPF_JEQ:
15239 		/* constants, umin/umax and smin/smax checks would be
15240 		 * redundant in this case because they all should match
15241 		 */
15242 		if (tnum_is_const(t1) && tnum_is_const(t2))
15243 			return t1.value == t2.value;
15244 		if (!tnum_overlap(t1, t2))
15245 			return 0;
15246 		/* non-overlapping ranges */
15247 		if (umin1 > umax2 || umax1 < umin2)
15248 			return 0;
15249 		if (smin1 > smax2 || smax1 < smin2)
15250 			return 0;
15251 		if (!is_jmp32) {
15252 			/* if 64-bit ranges are inconclusive, see if we can
15253 			 * utilize 32-bit subrange knowledge to eliminate
15254 			 * branches that can't be taken a priori
15255 			 */
15256 			if (reg_u32_min(reg1) > reg_u32_max(reg2) ||
15257 			    reg_u32_max(reg1) < reg_u32_min(reg2))
15258 				return 0;
15259 			if (reg_s32_min(reg1) > reg_s32_max(reg2) ||
15260 			    reg_s32_max(reg1) < reg_s32_min(reg2))
15261 				return 0;
15262 		}
15263 		break;
15264 	case BPF_JNE:
15265 		/* constants, umin/umax and smin/smax checks would be
15266 		 * redundant in this case because they all should match
15267 		 */
15268 		if (tnum_is_const(t1) && tnum_is_const(t2))
15269 			return t1.value != t2.value;
15270 		if (!tnum_overlap(t1, t2))
15271 			return 1;
15272 		/* non-overlapping ranges */
15273 		if (umin1 > umax2 || umax1 < umin2)
15274 			return 1;
15275 		if (smin1 > smax2 || smax1 < smin2)
15276 			return 1;
15277 		if (!is_jmp32) {
15278 			/* if 64-bit ranges are inconclusive, see if we can
15279 			 * utilize 32-bit subrange knowledge to eliminate
15280 			 * branches that can't be taken a priori
15281 			 */
15282 			if (reg_u32_min(reg1) > reg_u32_max(reg2) ||
15283 			    reg_u32_max(reg1) < reg_u32_min(reg2))
15284 				return 1;
15285 			if (reg_s32_min(reg1) > reg_s32_max(reg2) ||
15286 			    reg_s32_max(reg1) < reg_s32_min(reg2))
15287 				return 1;
15288 		}
15289 		break;
15290 	case BPF_JSET:
15291 		if (!is_reg_const(reg2, is_jmp32)) {
15292 			swap(reg1, reg2);
15293 			swap(t1, t2);
15294 		}
15295 		if (!is_reg_const(reg2, is_jmp32))
15296 			return -1;
15297 		if ((~t1.mask & t1.value) & t2.value)
15298 			return 1;
15299 		if (!((t1.mask | t1.value) & t2.value))
15300 			return 0;
15301 		break;
15302 	case BPF_JGT:
15303 		if (umin1 > umax2)
15304 			return 1;
15305 		else if (umax1 <= umin2)
15306 			return 0;
15307 		break;
15308 	case BPF_JSGT:
15309 		if (smin1 > smax2)
15310 			return 1;
15311 		else if (smax1 <= smin2)
15312 			return 0;
15313 		break;
15314 	case BPF_JLT:
15315 		if (umax1 < umin2)
15316 			return 1;
15317 		else if (umin1 >= umax2)
15318 			return 0;
15319 		break;
15320 	case BPF_JSLT:
15321 		if (smax1 < smin2)
15322 			return 1;
15323 		else if (smin1 >= smax2)
15324 			return 0;
15325 		break;
15326 	case BPF_JGE:
15327 		if (umin1 >= umax2)
15328 			return 1;
15329 		else if (umax1 < umin2)
15330 			return 0;
15331 		break;
15332 	case BPF_JSGE:
15333 		if (smin1 >= smax2)
15334 			return 1;
15335 		else if (smax1 < smin2)
15336 			return 0;
15337 		break;
15338 	case BPF_JLE:
15339 		if (umax1 <= umin2)
15340 			return 1;
15341 		else if (umin1 > umax2)
15342 			return 0;
15343 		break;
15344 	case BPF_JSLE:
15345 		if (smax1 <= smin2)
15346 			return 1;
15347 		else if (smin1 > smax2)
15348 			return 0;
15349 		break;
15350 	}
15351 
15352 	return simulate_both_branches_taken(env, opcode, is_jmp32);
15353 }
15354 
15355 static int flip_opcode(u32 opcode)
15356 {
15357 	/* How can we transform "a <op> b" into "b <op> a"? */
15358 	static const u8 opcode_flip[16] = {
15359 		/* these stay the same */
15360 		[BPF_JEQ  >> 4] = BPF_JEQ,
15361 		[BPF_JNE  >> 4] = BPF_JNE,
15362 		[BPF_JSET >> 4] = BPF_JSET,
15363 		/* these swap "lesser" and "greater" (L and G in the opcodes) */
15364 		[BPF_JGE  >> 4] = BPF_JLE,
15365 		[BPF_JGT  >> 4] = BPF_JLT,
15366 		[BPF_JLE  >> 4] = BPF_JGE,
15367 		[BPF_JLT  >> 4] = BPF_JGT,
15368 		[BPF_JSGE >> 4] = BPF_JSLE,
15369 		[BPF_JSGT >> 4] = BPF_JSLT,
15370 		[BPF_JSLE >> 4] = BPF_JSGE,
15371 		[BPF_JSLT >> 4] = BPF_JSGT
15372 	};
15373 	return opcode_flip[opcode >> 4];
15374 }
15375 
15376 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
15377 				   struct bpf_reg_state *src_reg,
15378 				   u8 opcode)
15379 {
15380 	struct bpf_reg_state *pkt;
15381 
15382 	if (src_reg->type == PTR_TO_PACKET_END) {
15383 		pkt = dst_reg;
15384 	} else if (dst_reg->type == PTR_TO_PACKET_END) {
15385 		pkt = src_reg;
15386 		opcode = flip_opcode(opcode);
15387 	} else {
15388 		return -1;
15389 	}
15390 
15391 	if (pkt->range >= 0)
15392 		return -1;
15393 
15394 	switch (opcode) {
15395 	case BPF_JLE:
15396 		/* pkt <= pkt_end */
15397 		fallthrough;
15398 	case BPF_JGT:
15399 		/* pkt > pkt_end */
15400 		if (pkt->range == BEYOND_PKT_END)
15401 			/* pkt has at last one extra byte beyond pkt_end */
15402 			return opcode == BPF_JGT;
15403 		break;
15404 	case BPF_JLT:
15405 		/* pkt < pkt_end */
15406 		fallthrough;
15407 	case BPF_JGE:
15408 		/* pkt >= pkt_end */
15409 		if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
15410 			return opcode == BPF_JGE;
15411 		break;
15412 	}
15413 	return -1;
15414 }
15415 
15416 /* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;"
15417  * and return:
15418  *  1 - branch will be taken and "goto target" will be executed
15419  *  0 - branch will not be taken and fall-through to next insn
15420  * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value
15421  *      range [0,10]
15422  */
15423 static int is_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1,
15424 			   struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32)
15425 {
15426 	if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32)
15427 		return is_pkt_ptr_branch_taken(reg1, reg2, opcode);
15428 
15429 	if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) {
15430 		u64 val;
15431 
15432 		/* arrange that reg2 is a scalar, and reg1 is a pointer */
15433 		if (!is_reg_const(reg2, is_jmp32)) {
15434 			opcode = flip_opcode(opcode);
15435 			swap(reg1, reg2);
15436 		}
15437 		/* and ensure that reg2 is a constant */
15438 		if (!is_reg_const(reg2, is_jmp32))
15439 			return -1;
15440 
15441 		if (!reg_not_null(env, reg1))
15442 			return -1;
15443 
15444 		/* If pointer is valid tests against zero will fail so we can
15445 		 * use this to direct branch taken.
15446 		 */
15447 		val = reg_const_value(reg2, is_jmp32);
15448 		if (val != 0)
15449 			return -1;
15450 
15451 		switch (opcode) {
15452 		case BPF_JEQ:
15453 			return 0;
15454 		case BPF_JNE:
15455 			return 1;
15456 		default:
15457 			return -1;
15458 		}
15459 	}
15460 
15461 	/* now deal with two scalars, but not necessarily constants */
15462 	return is_scalar_branch_taken(env, reg1, reg2, opcode, is_jmp32);
15463 }
15464 
15465 /* Opcode that corresponds to a *false* branch condition.
15466  * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2
15467  */
15468 static u8 rev_opcode(u8 opcode)
15469 {
15470 	switch (opcode) {
15471 	case BPF_JEQ:		return BPF_JNE;
15472 	case BPF_JNE:		return BPF_JEQ;
15473 	/* JSET doesn't have it's reverse opcode in BPF, so add
15474 	 * BPF_X flag to denote the reverse of that operation
15475 	 */
15476 	case BPF_JSET:		return BPF_JSET | BPF_X;
15477 	case BPF_JSET | BPF_X:	return BPF_JSET;
15478 	case BPF_JGE:		return BPF_JLT;
15479 	case BPF_JGT:		return BPF_JLE;
15480 	case BPF_JLE:		return BPF_JGT;
15481 	case BPF_JLT:		return BPF_JGE;
15482 	case BPF_JSGE:		return BPF_JSLT;
15483 	case BPF_JSGT:		return BPF_JSLE;
15484 	case BPF_JSLE:		return BPF_JSGT;
15485 	case BPF_JSLT:		return BPF_JSGE;
15486 	default:		return 0;
15487 	}
15488 }
15489 
15490 /* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */
15491 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15492 				u8 opcode, bool is_jmp32)
15493 {
15494 	struct tnum t;
15495 	u64 val;
15496 
15497 	/* In case of GE/GT/SGE/JST, reuse LE/LT/SLE/SLT logic from below */
15498 	switch (opcode) {
15499 	case BPF_JGE:
15500 	case BPF_JGT:
15501 	case BPF_JSGE:
15502 	case BPF_JSGT:
15503 		opcode = flip_opcode(opcode);
15504 		swap(reg1, reg2);
15505 		break;
15506 	default:
15507 		break;
15508 	}
15509 
15510 	switch (opcode) {
15511 	case BPF_JEQ:
15512 		if (is_jmp32) {
15513 			reg1->r32 = cnum32_intersect(reg1->r32, reg2->r32);
15514 			reg2->r32 = reg1->r32;
15515 
15516 			t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off));
15517 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15518 			reg2->var_off = tnum_with_subreg(reg2->var_off, t);
15519 		} else {
15520 			reg1->r64 = cnum64_intersect(reg1->r64, reg2->r64);
15521 			reg2->r64 = reg1->r64;
15522 
15523 			reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off);
15524 			reg2->var_off = reg1->var_off;
15525 		}
15526 		break;
15527 	case BPF_JNE:
15528 		if (!is_reg_const(reg2, is_jmp32))
15529 			swap(reg1, reg2);
15530 		if (!is_reg_const(reg2, is_jmp32))
15531 			break;
15532 
15533 		/* try to recompute the bound of reg1 if reg2 is a const and
15534 		 * is exactly the edge of reg1.
15535 		 */
15536 		val = reg_const_value(reg2, is_jmp32);
15537 		if (is_jmp32) {
15538 			/* Complement of the range [val, val] as cnum32. */
15539 			cnum32_intersect_with(&reg1->r32, (struct cnum32){ val + 1, U32_MAX - 1 });
15540 		} else {
15541 			/* Complement of the range [val, val] as cnum64. */
15542 			cnum64_intersect_with(&reg1->r64, (struct cnum64){ val + 1, U64_MAX - 1 });
15543 		}
15544 		break;
15545 	case BPF_JSET:
15546 		if (!is_reg_const(reg2, is_jmp32))
15547 			swap(reg1, reg2);
15548 		if (!is_reg_const(reg2, is_jmp32))
15549 			break;
15550 		val = reg_const_value(reg2, is_jmp32);
15551 		/* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X)
15552 		 * requires single bit to learn something useful. E.g., if we
15553 		 * know that `r1 & 0x3` is true, then which bits (0, 1, or both)
15554 		 * are actually set? We can learn something definite only if
15555 		 * it's a single-bit value to begin with.
15556 		 *
15557 		 * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have
15558 		 * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor
15559 		 * bit 1 is set, which we can readily use in adjustments.
15560 		 */
15561 		if (!is_power_of_2(val))
15562 			break;
15563 		if (is_jmp32) {
15564 			t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val));
15565 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15566 		} else {
15567 			reg1->var_off = tnum_or(reg1->var_off, tnum_const(val));
15568 		}
15569 		break;
15570 	case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */
15571 		if (!is_reg_const(reg2, is_jmp32))
15572 			swap(reg1, reg2);
15573 		if (!is_reg_const(reg2, is_jmp32))
15574 			break;
15575 		val = reg_const_value(reg2, is_jmp32);
15576 		/* Forget the ranges before narrowing tnums, to avoid invariant
15577 		 * violations if we're on a dead branch.
15578 		 */
15579 		__mark_reg_unbounded(reg1);
15580 		if (is_jmp32) {
15581 			t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val));
15582 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15583 		} else {
15584 			reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val));
15585 		}
15586 		break;
15587 	case BPF_JLE:
15588 		if (is_jmp32) {
15589 			cnum32_intersect_with_urange(&reg1->r32, 0, reg_u32_max(reg2));
15590 			cnum32_intersect_with_urange(&reg2->r32, reg_u32_min(reg1), U32_MAX);
15591 		} else {
15592 			cnum64_intersect_with_urange(&reg1->r64, 0, reg_umax(reg2));
15593 			cnum64_intersect_with_urange(&reg2->r64, reg_umin(reg1), U64_MAX);
15594 		}
15595 		break;
15596 	case BPF_JLT:
15597 		if (is_jmp32) {
15598 			cnum32_intersect_with_urange(&reg1->r32, 0, reg_u32_max(reg2) - 1);
15599 			cnum32_intersect_with_urange(&reg2->r32, reg_u32_min(reg1) + 1, U32_MAX);
15600 		} else {
15601 			cnum64_intersect_with_urange(&reg1->r64, 0, reg_umax(reg2) - 1);
15602 			cnum64_intersect_with_urange(&reg2->r64, reg_umin(reg1) + 1, U64_MAX);
15603 		}
15604 		break;
15605 	case BPF_JSLE:
15606 		if (is_jmp32) {
15607 			cnum32_intersect_with_srange(&reg1->r32, S32_MIN, reg_s32_max(reg2));
15608 			cnum32_intersect_with_srange(&reg2->r32, reg_s32_min(reg1), S32_MAX);
15609 		} else {
15610 			cnum64_intersect_with_srange(&reg1->r64, S64_MIN, reg_smax(reg2));
15611 			cnum64_intersect_with_srange(&reg2->r64, reg_smin(reg1), S64_MAX);
15612 		}
15613 		break;
15614 	case BPF_JSLT:
15615 		if (is_jmp32) {
15616 			cnum32_intersect_with_srange(&reg1->r32, S32_MIN, reg_s32_max(reg2) - 1);
15617 			cnum32_intersect_with_srange(&reg2->r32, reg_s32_min(reg1) + 1, S32_MAX);
15618 		} else {
15619 			cnum64_intersect_with_srange(&reg1->r64, S64_MIN, reg_smax(reg2) - 1);
15620 			cnum64_intersect_with_srange(&reg2->r64, reg_smin(reg1) + 1, S64_MAX);
15621 		}
15622 		break;
15623 	default:
15624 		return;
15625 	}
15626 }
15627 
15628 /* Check for invariant violations on the registers for both branches of a condition */
15629 static int regs_bounds_sanity_check_branches(struct bpf_verifier_env *env)
15630 {
15631 	int err;
15632 
15633 	err = reg_bounds_sanity_check(env, &env->true_reg1, "true_reg1");
15634 	err = err ?: reg_bounds_sanity_check(env, &env->true_reg2, "true_reg2");
15635 	err = err ?: reg_bounds_sanity_check(env, &env->false_reg1, "false_reg1");
15636 	err = err ?: reg_bounds_sanity_check(env, &env->false_reg2, "false_reg2");
15637 	return err;
15638 }
15639 
15640 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
15641 				 struct bpf_reg_state *reg, u32 id,
15642 				 bool is_null)
15643 {
15644 	if (type_may_be_null(reg->type) && reg->id == id &&
15645 	    (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
15646 		/* Old offset should have been known-zero, because we don't
15647 		 * allow pointer arithmetic on pointers that might be NULL.
15648 		 * If we see this happening, don't convert the register.
15649 		 *
15650 		 * But in some cases, some helpers that return local kptrs
15651 		 * advance offset for the returned pointer. In those cases,
15652 		 * it is fine to expect to see reg->var_off.
15653 		 */
15654 		if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
15655 		    WARN_ON_ONCE(!tnum_equals_const(reg->var_off, 0)))
15656 			return;
15657 		if (is_null) {
15658 			/* We don't need id from this point
15659 			 * onwards anymore, thus we should better reset it,
15660 			 * so that state pruning has chances to take effect.
15661 			 */
15662 			__mark_reg_known_zero(reg);
15663 			reg->type = SCALAR_VALUE;
15664 
15665 			return;
15666 		}
15667 
15668 		mark_ptr_not_null_reg(reg);
15669 
15670 		/*
15671 		 * reg->id is preserved for object relationship tracking
15672 		 * and spin_lock lock state tracking
15673 		 */
15674 	}
15675 }
15676 
15677 /* The logic is similar to find_good_pkt_pointers(), both could eventually
15678  * be folded together at some point.
15679  */
15680 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
15681 				  bool is_null)
15682 {
15683 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
15684 	struct bpf_reg_state *regs = state->regs, *reg;
15685 	u32 id = regs[regno].id;
15686 
15687 	if (is_null && find_reference_state(vstate, id))
15688 		/* regs[regno] is in the " == NULL" branch.
15689 		 * No one could have freed the reference state before
15690 		 * doing the NULL check.
15691 		 */
15692 		WARN_ON_ONCE(release_reference_nomark(vstate, id));
15693 
15694 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15695 		mark_ptr_or_null_reg(state, reg, id, is_null);
15696 	}));
15697 }
15698 
15699 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
15700 				   struct bpf_reg_state *dst_reg,
15701 				   struct bpf_reg_state *src_reg,
15702 				   struct bpf_verifier_state *this_branch,
15703 				   struct bpf_verifier_state *other_branch)
15704 {
15705 	if (BPF_SRC(insn->code) != BPF_X)
15706 		return false;
15707 
15708 	/* Pointers are always 64-bit. */
15709 	if (BPF_CLASS(insn->code) == BPF_JMP32)
15710 		return false;
15711 
15712 	switch (BPF_OP(insn->code)) {
15713 	case BPF_JGT:
15714 		if ((dst_reg->type == PTR_TO_PACKET &&
15715 		     src_reg->type == PTR_TO_PACKET_END) ||
15716 		    (dst_reg->type == PTR_TO_PACKET_META &&
15717 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15718 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
15719 			find_good_pkt_pointers(this_branch, dst_reg,
15720 					       dst_reg->type, false);
15721 			mark_pkt_end(other_branch, insn->dst_reg, true);
15722 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15723 			    src_reg->type == PTR_TO_PACKET) ||
15724 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15725 			    src_reg->type == PTR_TO_PACKET_META)) {
15726 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
15727 			find_good_pkt_pointers(other_branch, src_reg,
15728 					       src_reg->type, true);
15729 			mark_pkt_end(this_branch, insn->src_reg, false);
15730 		} else {
15731 			return false;
15732 		}
15733 		break;
15734 	case BPF_JLT:
15735 		if ((dst_reg->type == PTR_TO_PACKET &&
15736 		     src_reg->type == PTR_TO_PACKET_END) ||
15737 		    (dst_reg->type == PTR_TO_PACKET_META &&
15738 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15739 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
15740 			find_good_pkt_pointers(other_branch, dst_reg,
15741 					       dst_reg->type, true);
15742 			mark_pkt_end(this_branch, insn->dst_reg, false);
15743 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15744 			    src_reg->type == PTR_TO_PACKET) ||
15745 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15746 			    src_reg->type == PTR_TO_PACKET_META)) {
15747 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
15748 			find_good_pkt_pointers(this_branch, src_reg,
15749 					       src_reg->type, false);
15750 			mark_pkt_end(other_branch, insn->src_reg, true);
15751 		} else {
15752 			return false;
15753 		}
15754 		break;
15755 	case BPF_JGE:
15756 		if ((dst_reg->type == PTR_TO_PACKET &&
15757 		     src_reg->type == PTR_TO_PACKET_END) ||
15758 		    (dst_reg->type == PTR_TO_PACKET_META &&
15759 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15760 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
15761 			find_good_pkt_pointers(this_branch, dst_reg,
15762 					       dst_reg->type, true);
15763 			mark_pkt_end(other_branch, insn->dst_reg, false);
15764 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15765 			    src_reg->type == PTR_TO_PACKET) ||
15766 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15767 			    src_reg->type == PTR_TO_PACKET_META)) {
15768 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
15769 			find_good_pkt_pointers(other_branch, src_reg,
15770 					       src_reg->type, false);
15771 			mark_pkt_end(this_branch, insn->src_reg, true);
15772 		} else {
15773 			return false;
15774 		}
15775 		break;
15776 	case BPF_JLE:
15777 		if ((dst_reg->type == PTR_TO_PACKET &&
15778 		     src_reg->type == PTR_TO_PACKET_END) ||
15779 		    (dst_reg->type == PTR_TO_PACKET_META &&
15780 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
15781 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
15782 			find_good_pkt_pointers(other_branch, dst_reg,
15783 					       dst_reg->type, false);
15784 			mark_pkt_end(this_branch, insn->dst_reg, true);
15785 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
15786 			    src_reg->type == PTR_TO_PACKET) ||
15787 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
15788 			    src_reg->type == PTR_TO_PACKET_META)) {
15789 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
15790 			find_good_pkt_pointers(this_branch, src_reg,
15791 					       src_reg->type, true);
15792 			mark_pkt_end(other_branch, insn->src_reg, false);
15793 		} else {
15794 			return false;
15795 		}
15796 		break;
15797 	default:
15798 		return false;
15799 	}
15800 
15801 	return true;
15802 }
15803 
15804 static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_state *reg,
15805 				  u32 id, u32 frameno, u32 spi_or_reg, bool is_reg)
15806 {
15807 	struct linked_reg *e;
15808 
15809 	if (reg->type != SCALAR_VALUE || (reg->id & ~BPF_ADD_CONST) != id)
15810 		return;
15811 
15812 	e = linked_regs_push(reg_set);
15813 	if (e) {
15814 		e->frameno = frameno;
15815 		e->is_reg = is_reg;
15816 		e->regno = spi_or_reg;
15817 	} else {
15818 		clear_scalar_id(reg);
15819 	}
15820 }
15821 
15822 /* For all R being scalar registers or spilled scalar registers
15823  * in verifier state, save R in linked_regs if R->id == id.
15824  * If there are too many Rs sharing same id, reset id for leftover Rs.
15825  */
15826 static void collect_linked_regs(struct bpf_verifier_env *env,
15827 				struct bpf_verifier_state *vstate,
15828 				u32 id,
15829 				struct linked_regs *linked_regs)
15830 {
15831 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
15832 	struct bpf_func_state *func;
15833 	struct bpf_reg_state *reg;
15834 	u16 live_regs;
15835 	int i, j;
15836 
15837 	id = id & ~BPF_ADD_CONST;
15838 	for (i = vstate->curframe; i >= 0; i--) {
15839 		live_regs = aux[bpf_frame_insn_idx(vstate, i)].live_regs_before;
15840 		func = vstate->frame[i];
15841 		for (j = 0; j < BPF_REG_FP; j++) {
15842 			if (!(live_regs & BIT(j)))
15843 				continue;
15844 			reg = &func->regs[j];
15845 			__collect_linked_regs(linked_regs, reg, id, i, j, true);
15846 		}
15847 		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
15848 			if (!bpf_is_spilled_reg(&func->stack[j]))
15849 				continue;
15850 			reg = &func->stack[j].spilled_ptr;
15851 			__collect_linked_regs(linked_regs, reg, id, i, j, false);
15852 		}
15853 	}
15854 }
15855 
15856 /* For all R in linked_regs, copy known_reg range into R
15857  * if R->id == known_reg->id.
15858  */
15859 static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_state *vstate,
15860 			     struct bpf_reg_state *known_reg, struct linked_regs *linked_regs)
15861 {
15862 	struct bpf_reg_state fake_reg;
15863 	struct bpf_reg_state *reg;
15864 	struct linked_reg *e;
15865 	int i;
15866 
15867 	for (i = 0; i < linked_regs->cnt; ++i) {
15868 		e = &linked_regs->entries[i];
15869 		reg = e->is_reg ? &vstate->frame[e->frameno]->regs[e->regno]
15870 				: &vstate->frame[e->frameno]->stack[e->spi].spilled_ptr;
15871 		if (reg->type != SCALAR_VALUE || reg == known_reg)
15872 			continue;
15873 		if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST))
15874 			continue;
15875 		/*
15876 		 * Skip mixed 32/64-bit links: the delta relationship doesn't
15877 		 * hold across different ALU widths.
15878 		 */
15879 		if (((reg->id ^ known_reg->id) & BPF_ADD_CONST) == BPF_ADD_CONST)
15880 			continue;
15881 		if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
15882 		    reg->delta == known_reg->delta) {
15883 			s32 saved_subreg_def = reg->subreg_def;
15884 
15885 			*reg = *known_reg;
15886 			reg->subreg_def = saved_subreg_def;
15887 		} else {
15888 			s32 saved_subreg_def = reg->subreg_def;
15889 			s32 saved_off = reg->delta;
15890 			u32 saved_id = reg->id;
15891 
15892 			fake_reg.type = SCALAR_VALUE;
15893 			__mark_reg_known(&fake_reg, (s64)reg->delta - (s64)known_reg->delta);
15894 
15895 			/* reg = known_reg; reg += delta */
15896 			*reg = *known_reg;
15897 			/*
15898 			 * Must preserve off, id and subreg_def flag,
15899 			 * otherwise another sync_linked_regs() will be incorrect.
15900 			 */
15901 			reg->delta = saved_off;
15902 			reg->id = saved_id;
15903 			reg->subreg_def = saved_subreg_def;
15904 
15905 			scalar32_min_max_add(reg, &fake_reg);
15906 			scalar_min_max_add(reg, &fake_reg);
15907 			reg->var_off = tnum_add(reg->var_off, fake_reg.var_off);
15908 			if ((reg->id | known_reg->id) & BPF_ADD_CONST32)
15909 				zext_32_to_64(reg);
15910 			reg_bounds_sync(reg);
15911 		}
15912 		if (e->is_reg)
15913 			mark_reg_scratched(env, e->regno);
15914 		else
15915 			mark_stack_slot_scratched(env, e->spi);
15916 	}
15917 }
15918 
15919 static int check_cond_jmp_op(struct bpf_verifier_env *env,
15920 			     struct bpf_insn *insn, int *insn_idx)
15921 {
15922 	struct bpf_verifier_state *this_branch = env->cur_state;
15923 	struct bpf_verifier_state *other_branch;
15924 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
15925 	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
15926 	struct bpf_reg_state *eq_branch_regs;
15927 	struct linked_regs linked_regs = {};
15928 	u8 opcode = BPF_OP(insn->code);
15929 	int insn_flags = 0;
15930 	bool is_jmp32;
15931 	int pred = -1;
15932 	int err;
15933 
15934 	/* Only conditional jumps are expected to reach here. */
15935 	if (opcode == BPF_JA || opcode > BPF_JCOND) {
15936 		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
15937 		return -EINVAL;
15938 	}
15939 
15940 	if (opcode == BPF_JCOND) {
15941 		struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
15942 		int idx = *insn_idx;
15943 
15944 		prev_st = find_prev_entry(env, cur_st->parent, idx);
15945 
15946 		/* branch out 'fallthrough' insn as a new state to explore */
15947 		queued_st = push_stack(env, idx + 1, idx, false);
15948 		if (IS_ERR(queued_st))
15949 			return PTR_ERR(queued_st);
15950 
15951 		queued_st->may_goto_depth++;
15952 		if (prev_st)
15953 			widen_imprecise_scalars(env, prev_st, queued_st);
15954 		*insn_idx += insn->off;
15955 		return 0;
15956 	}
15957 
15958 	/* check src2 operand */
15959 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15960 	if (err)
15961 		return err;
15962 
15963 	dst_reg = &regs[insn->dst_reg];
15964 	if (BPF_SRC(insn->code) == BPF_X) {
15965 		/* check src1 operand */
15966 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
15967 		if (err)
15968 			return err;
15969 
15970 		src_reg = &regs[insn->src_reg];
15971 		if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) &&
15972 		    is_pointer_value(env, insn->src_reg)) {
15973 			verbose(env, "R%d pointer comparison prohibited\n",
15974 				insn->src_reg);
15975 			return -EACCES;
15976 		}
15977 
15978 		if (src_reg->type == PTR_TO_STACK)
15979 			insn_flags |= INSN_F_SRC_REG_STACK;
15980 		if (dst_reg->type == PTR_TO_STACK)
15981 			insn_flags |= INSN_F_DST_REG_STACK;
15982 	} else {
15983 		src_reg = &env->fake_reg[0];
15984 		memset(src_reg, 0, sizeof(*src_reg));
15985 		src_reg->type = SCALAR_VALUE;
15986 		__mark_reg_known(src_reg, insn->imm);
15987 
15988 		if (dst_reg->type == PTR_TO_STACK)
15989 			insn_flags |= INSN_F_DST_REG_STACK;
15990 	}
15991 
15992 	if (insn_flags) {
15993 		err = bpf_push_jmp_history(env, this_branch, insn_flags, 0, 0, 0);
15994 		if (err)
15995 			return err;
15996 	}
15997 
15998 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
15999 	env->false_reg1 = *dst_reg;
16000 	env->false_reg2 = *src_reg;
16001 	env->true_reg1 = *dst_reg;
16002 	env->true_reg2 = *src_reg;
16003 	pred = is_branch_taken(env, dst_reg, src_reg, opcode, is_jmp32);
16004 	if (pred >= 0) {
16005 		/* If we get here with a dst_reg pointer type it is because
16006 		 * above is_branch_taken() special cased the 0 comparison.
16007 		 */
16008 		if (!__is_pointer_value(false, dst_reg))
16009 			err = mark_chain_precision(env, insn->dst_reg);
16010 		if (BPF_SRC(insn->code) == BPF_X && !err &&
16011 		    !__is_pointer_value(false, src_reg))
16012 			err = mark_chain_precision(env, insn->src_reg);
16013 		if (err)
16014 			return err;
16015 	}
16016 
16017 	if (pred == 1) {
16018 		/* Only follow the goto, ignore fall-through. If needed, push
16019 		 * the fall-through branch for simulation under speculative
16020 		 * execution.
16021 		 */
16022 		if (!env->bypass_spec_v1) {
16023 			err = sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx);
16024 			if (err < 0)
16025 				return err;
16026 		}
16027 		if (env->log.level & BPF_LOG_LEVEL)
16028 			print_insn_state(env, this_branch, this_branch->curframe);
16029 		*insn_idx += insn->off;
16030 		return 0;
16031 	} else if (pred == 0) {
16032 		/* Only follow the fall-through branch, since that's where the
16033 		 * program will go. If needed, push the goto branch for
16034 		 * simulation under speculative execution.
16035 		 */
16036 		if (!env->bypass_spec_v1) {
16037 			err = sanitize_speculative_path(env, insn, *insn_idx + insn->off + 1,
16038 							*insn_idx);
16039 			if (err < 0)
16040 				return err;
16041 		}
16042 		if (env->log.level & BPF_LOG_LEVEL)
16043 			print_insn_state(env, this_branch, this_branch->curframe);
16044 		return 0;
16045 	}
16046 
16047 	/* Push scalar registers sharing same ID to jump history,
16048 	 * do this before creating 'other_branch', so that both
16049 	 * 'this_branch' and 'other_branch' share this history
16050 	 * if parent state is created.
16051 	 */
16052 	if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id)
16053 		collect_linked_regs(env, this_branch, src_reg->id, &linked_regs);
16054 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id)
16055 		collect_linked_regs(env, this_branch, dst_reg->id, &linked_regs);
16056 	if (linked_regs.cnt > 1) {
16057 		err = bpf_push_jmp_history(env, this_branch, 0, 0, 0, linked_regs_pack(&linked_regs));
16058 		if (err)
16059 			return err;
16060 	}
16061 
16062 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, false);
16063 	if (IS_ERR(other_branch))
16064 		return PTR_ERR(other_branch);
16065 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
16066 
16067 	err = regs_bounds_sanity_check_branches(env);
16068 	if (err)
16069 		return err;
16070 
16071 	*dst_reg = env->false_reg1;
16072 	*src_reg = env->false_reg2;
16073 	other_branch_regs[insn->dst_reg] = env->true_reg1;
16074 	if (BPF_SRC(insn->code) == BPF_X)
16075 		other_branch_regs[insn->src_reg] = env->true_reg2;
16076 
16077 	if (BPF_SRC(insn->code) == BPF_X &&
16078 	    src_reg->type == SCALAR_VALUE && src_reg->id &&
16079 	    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
16080 		sync_linked_regs(env, this_branch, src_reg, &linked_regs);
16081 		sync_linked_regs(env, other_branch, &other_branch_regs[insn->src_reg],
16082 				 &linked_regs);
16083 	}
16084 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
16085 	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
16086 		sync_linked_regs(env, this_branch, dst_reg, &linked_regs);
16087 		sync_linked_regs(env, other_branch, &other_branch_regs[insn->dst_reg],
16088 				 &linked_regs);
16089 	}
16090 
16091 	/* if one pointer register is compared to another pointer
16092 	 * register check if PTR_MAYBE_NULL could be lifted.
16093 	 * E.g. register A - maybe null
16094 	 *      register B - not null
16095 	 * for JNE A, B, ... - A is not null in the false branch;
16096 	 * for JEQ A, B, ... - A is not null in the true branch.
16097 	 *
16098 	 * Since PTR_TO_BTF_ID points to a kernel struct that does
16099 	 * not need to be null checked by the BPF program, i.e.,
16100 	 * could be null even without PTR_MAYBE_NULL marking, so
16101 	 * only propagate nullness when neither reg is that type.
16102 	 */
16103 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
16104 	    __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
16105 	    type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
16106 	    base_type(src_reg->type) != PTR_TO_BTF_ID &&
16107 	    base_type(dst_reg->type) != PTR_TO_BTF_ID) {
16108 		eq_branch_regs = NULL;
16109 		switch (opcode) {
16110 		case BPF_JEQ:
16111 			eq_branch_regs = other_branch_regs;
16112 			break;
16113 		case BPF_JNE:
16114 			eq_branch_regs = regs;
16115 			break;
16116 		default:
16117 			/* do nothing */
16118 			break;
16119 		}
16120 		if (eq_branch_regs) {
16121 			if (type_may_be_null(src_reg->type))
16122 				mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
16123 			else
16124 				mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
16125 		}
16126 	}
16127 
16128 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
16129 	 * Also does the same detection for a register whose the value is
16130 	 * known to be 0.
16131 	 * NOTE: these optimizations below are related with pointer comparison
16132 	 *       which will never be JMP32.
16133 	 */
16134 	if (!is_jmp32 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
16135 	    type_may_be_null(dst_reg->type) &&
16136 	    ((BPF_SRC(insn->code) == BPF_K && insn->imm == 0) ||
16137 	     (BPF_SRC(insn->code) == BPF_X && bpf_register_is_null(src_reg)))) {
16138 		/* Mark all identical registers in each branch as either
16139 		 * safe or unknown depending R == 0 or R != 0 conditional.
16140 		 */
16141 		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
16142 				      opcode == BPF_JNE);
16143 		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
16144 				      opcode == BPF_JEQ);
16145 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
16146 					   this_branch, other_branch) &&
16147 		   is_pointer_value(env, insn->dst_reg)) {
16148 		verbose(env, "R%d pointer comparison prohibited\n",
16149 			insn->dst_reg);
16150 		return -EACCES;
16151 	}
16152 	if (env->log.level & BPF_LOG_LEVEL)
16153 		print_insn_state(env, this_branch, this_branch->curframe);
16154 	return 0;
16155 }
16156 
16157 /* verify BPF_LD_IMM64 instruction */
16158 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
16159 {
16160 	struct bpf_insn_aux_data *aux = cur_aux(env);
16161 	struct bpf_reg_state *regs = cur_regs(env);
16162 	struct bpf_reg_state *dst_reg;
16163 	struct bpf_map *map;
16164 	int err;
16165 
16166 	if (BPF_SIZE(insn->code) != BPF_DW) {
16167 		verbose(env, "invalid BPF_LD_IMM insn\n");
16168 		return -EINVAL;
16169 	}
16170 
16171 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
16172 	if (err)
16173 		return err;
16174 
16175 	dst_reg = &regs[insn->dst_reg];
16176 	if (insn->src_reg == 0) {
16177 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
16178 
16179 		dst_reg->type = SCALAR_VALUE;
16180 		__mark_reg_known(&regs[insn->dst_reg], imm);
16181 		return 0;
16182 	}
16183 
16184 	/* All special src_reg cases are listed below. From this point onwards
16185 	 * we either succeed and assign a corresponding dst_reg->type after
16186 	 * zeroing the offset, or fail and reject the program.
16187 	 */
16188 	mark_reg_known_zero(env, regs, insn->dst_reg);
16189 
16190 	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
16191 		dst_reg->type = aux->btf_var.reg_type;
16192 		switch (base_type(dst_reg->type)) {
16193 		case PTR_TO_MEM:
16194 			dst_reg->mem_size = aux->btf_var.mem_size;
16195 			break;
16196 		case PTR_TO_BTF_ID:
16197 			dst_reg->btf = aux->btf_var.btf;
16198 			dst_reg->btf_id = aux->btf_var.btf_id;
16199 			break;
16200 		default:
16201 			verifier_bug(env, "pseudo btf id: unexpected dst reg type");
16202 			return -EFAULT;
16203 		}
16204 		return 0;
16205 	}
16206 
16207 	if (insn->src_reg == BPF_PSEUDO_FUNC) {
16208 		struct bpf_prog_aux *aux = env->prog->aux;
16209 		u32 subprogno = bpf_find_subprog(env,
16210 						 env->insn_idx + insn->imm + 1);
16211 
16212 		if (!aux->func_info) {
16213 			verbose(env, "missing btf func_info\n");
16214 			return -EINVAL;
16215 		}
16216 		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
16217 			verbose(env, "callback function not static\n");
16218 			return -EINVAL;
16219 		}
16220 
16221 		dst_reg->type = PTR_TO_FUNC;
16222 		dst_reg->subprogno = subprogno;
16223 		return 0;
16224 	}
16225 
16226 	map = env->used_maps[aux->map_index];
16227 
16228 	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
16229 	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
16230 		if (map->map_type == BPF_MAP_TYPE_ARENA) {
16231 			__mark_reg_unknown(env, dst_reg);
16232 			dst_reg->map_ptr = map;
16233 			return 0;
16234 		}
16235 		__mark_reg_known(dst_reg, aux->map_off);
16236 		dst_reg->type = PTR_TO_MAP_VALUE;
16237 		dst_reg->map_ptr = map;
16238 		WARN_ON_ONCE(map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
16239 			     map->max_entries != 1);
16240 		/* We want reg->id to be same (0) as map_value is not distinct */
16241 	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
16242 		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
16243 		dst_reg->type = CONST_PTR_TO_MAP;
16244 		dst_reg->map_ptr = map;
16245 	} else {
16246 		verifier_bug(env, "unexpected src reg value for ldimm64");
16247 		return -EFAULT;
16248 	}
16249 
16250 	return 0;
16251 }
16252 
16253 static bool may_access_skb(enum bpf_prog_type type)
16254 {
16255 	switch (type) {
16256 	case BPF_PROG_TYPE_SOCKET_FILTER:
16257 	case BPF_PROG_TYPE_SCHED_CLS:
16258 	case BPF_PROG_TYPE_SCHED_ACT:
16259 		return true;
16260 	default:
16261 		return false;
16262 	}
16263 }
16264 
16265 /* verify safety of LD_ABS|LD_IND instructions:
16266  * - they can only appear in the programs where ctx == skb
16267  * - since they are wrappers of function calls, they scratch R1-R5 registers,
16268  *   preserve R6-R9, and store return value into R0
16269  *
16270  * Implicit input:
16271  *   ctx == skb == R6 == CTX
16272  *
16273  * Explicit input:
16274  *   SRC == any register
16275  *   IMM == 32-bit immediate
16276  *
16277  * Output:
16278  *   R0 - 8/16/32-bit skb data converted to cpu endianness
16279  */
16280 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
16281 {
16282 	struct bpf_reg_state *regs = cur_regs(env);
16283 	static const int ctx_reg = BPF_REG_6;
16284 	u8 mode = BPF_MODE(insn->code);
16285 	int i, err;
16286 
16287 	if (!may_access_skb(resolve_prog_type(env->prog))) {
16288 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
16289 		return -EINVAL;
16290 	}
16291 
16292 	if (!env->ops->gen_ld_abs) {
16293 		verifier_bug(env, "gen_ld_abs is null");
16294 		return -EFAULT;
16295 	}
16296 
16297 	/* check whether implicit source operand (register R6) is readable */
16298 	err = check_reg_arg(env, ctx_reg, SRC_OP);
16299 	if (err)
16300 		return err;
16301 
16302 	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
16303 	 * gen_ld_abs() may terminate the program at runtime, leading to
16304 	 * reference leak.
16305 	 */
16306 	err = check_resource_leak(env, false, true, "BPF_LD_[ABS|IND]");
16307 	if (err)
16308 		return err;
16309 
16310 	if (regs[ctx_reg].type != PTR_TO_CTX) {
16311 		verbose(env,
16312 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
16313 		return -EINVAL;
16314 	}
16315 
16316 	if (mode == BPF_IND) {
16317 		/* check explicit source operand */
16318 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
16319 		if (err)
16320 			return err;
16321 	}
16322 
16323 	err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
16324 	if (err < 0)
16325 		return err;
16326 
16327 	/* reset caller saved regs to unreadable */
16328 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
16329 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
16330 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
16331 	}
16332 
16333 	/* mark destination R0 register as readable, since it contains
16334 	 * the value fetched from the packet.
16335 	 * Already marked as written above.
16336 	 */
16337 	mark_reg_unknown(env, regs, BPF_REG_0);
16338 	/* ld_abs load up to 32-bit skb data. */
16339 	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
16340 	/*
16341 	 * See bpf_gen_ld_abs() which emits a hidden BPF_EXIT with r0=0
16342 	 * which must be explored by the verifier when in a subprog.
16343 	 */
16344 	if (env->cur_state->curframe) {
16345 		struct bpf_verifier_state *branch;
16346 
16347 		mark_reg_scratched(env, BPF_REG_0);
16348 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
16349 		if (IS_ERR(branch))
16350 			return PTR_ERR(branch);
16351 		mark_reg_known_zero(env, regs, BPF_REG_0);
16352 		err = prepare_func_exit(env, &env->insn_idx);
16353 		if (err)
16354 			return err;
16355 		env->insn_idx--;
16356 	}
16357 	return 0;
16358 }
16359 
16360 
16361 static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_range *range)
16362 {
16363 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
16364 
16365 	/* Default return value range. */
16366 	*range = retval_range(0, 1);
16367 
16368 	switch (prog_type) {
16369 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
16370 		switch (env->prog->expected_attach_type) {
16371 		case BPF_CGROUP_UDP4_RECVMSG:
16372 		case BPF_CGROUP_UDP6_RECVMSG:
16373 		case BPF_CGROUP_UNIX_RECVMSG:
16374 		case BPF_CGROUP_INET4_GETPEERNAME:
16375 		case BPF_CGROUP_INET6_GETPEERNAME:
16376 		case BPF_CGROUP_UNIX_GETPEERNAME:
16377 		case BPF_CGROUP_INET4_GETSOCKNAME:
16378 		case BPF_CGROUP_INET6_GETSOCKNAME:
16379 		case BPF_CGROUP_UNIX_GETSOCKNAME:
16380 			*range = retval_range(1, 1);
16381 			break;
16382 		case BPF_CGROUP_INET4_BIND:
16383 		case BPF_CGROUP_INET6_BIND:
16384 			*range = retval_range(0, 3);
16385 			break;
16386 		default:
16387 			break;
16388 		}
16389 		break;
16390 	case BPF_PROG_TYPE_CGROUP_SKB:
16391 		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS)
16392 			*range = retval_range(0, 3);
16393 		break;
16394 	case BPF_PROG_TYPE_CGROUP_SOCK:
16395 	case BPF_PROG_TYPE_SOCK_OPS:
16396 	case BPF_PROG_TYPE_CGROUP_DEVICE:
16397 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
16398 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
16399 		break;
16400 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
16401 		if (!env->prog->aux->attach_btf_id)
16402 			return false;
16403 		*range = retval_range(0, 0);
16404 		break;
16405 	case BPF_PROG_TYPE_TRACING:
16406 		switch (env->prog->expected_attach_type) {
16407 		case BPF_TRACE_FENTRY:
16408 		case BPF_TRACE_FEXIT:
16409 		case BPF_TRACE_FSESSION:
16410 		case BPF_TRACE_FENTRY_MULTI:
16411 		case BPF_TRACE_FEXIT_MULTI:
16412 		case BPF_TRACE_FSESSION_MULTI:
16413 			*range = retval_range(0, 0);
16414 			break;
16415 		case BPF_TRACE_RAW_TP:
16416 		case BPF_MODIFY_RETURN:
16417 			return false;
16418 		case BPF_TRACE_ITER:
16419 		default:
16420 			break;
16421 		}
16422 		break;
16423 	case BPF_PROG_TYPE_KPROBE:
16424 		switch (env->prog->expected_attach_type) {
16425 		case BPF_TRACE_KPROBE_SESSION:
16426 		case BPF_TRACE_UPROBE_SESSION:
16427 			break;
16428 		default:
16429 			return false;
16430 		}
16431 		break;
16432 	case BPF_PROG_TYPE_SK_LOOKUP:
16433 		*range = retval_range(SK_DROP, SK_PASS);
16434 		break;
16435 
16436 	case BPF_PROG_TYPE_LSM:
16437 		if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
16438 			/* no range found, any return value is allowed */
16439 			if (!get_func_retval_range(env->prog, range))
16440 				return false;
16441 			/* no restricted range, any return value is allowed */
16442 			if (range->minval == S32_MIN && range->maxval == S32_MAX)
16443 				return false;
16444 			range->return_32bit = true;
16445 		} else if (!env->prog->aux->attach_func_proto->type) {
16446 			/* Make sure programs that attach to void
16447 			 * hooks don't try to modify return value.
16448 			 */
16449 			*range = retval_range(1, 1);
16450 		}
16451 		break;
16452 
16453 	case BPF_PROG_TYPE_NETFILTER:
16454 		*range = retval_range(NF_DROP, NF_ACCEPT);
16455 		break;
16456 	case BPF_PROG_TYPE_STRUCT_OPS:
16457 		*range = retval_range(0, 0);
16458 		break;
16459 	case BPF_PROG_TYPE_EXT:
16460 		/* freplace program can return anything as its return value
16461 		 * depends on the to-be-replaced kernel func or bpf program.
16462 		 */
16463 	default:
16464 		return false;
16465 	}
16466 
16467 	/* Continue calculating. */
16468 
16469 	return true;
16470 }
16471 
16472 static bool program_returns_void(struct bpf_verifier_env *env)
16473 {
16474 	const struct bpf_prog *prog = env->prog;
16475 	enum bpf_prog_type prog_type = prog->type;
16476 
16477 	switch (prog_type) {
16478 	case BPF_PROG_TYPE_LSM:
16479 		/* See return_retval_range, for BPF_LSM_CGROUP can be 0 or 0-1 depending on hook. */
16480 		if (prog->expected_attach_type != BPF_LSM_CGROUP &&
16481 		    !prog->aux->attach_func_proto->type)
16482 			return true;
16483 		break;
16484 	case BPF_PROG_TYPE_STRUCT_OPS:
16485 		if (!prog->aux->attach_func_proto->type)
16486 			return true;
16487 		break;
16488 	case BPF_PROG_TYPE_EXT:
16489 		/*
16490 		 * If the actual program is an extension, let it
16491 		 * return void - attaching will succeed only if the
16492 		 * program being replaced also returns void, and since
16493 		 * it has passed verification its actual type doesn't matter.
16494 		 */
16495 		if (subprog_returns_void(env, 0))
16496 			return true;
16497 		break;
16498 	default:
16499 		break;
16500 	}
16501 	return false;
16502 }
16503 
16504 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
16505 {
16506 	const char *exit_ctx = "At program exit";
16507 	struct tnum enforce_attach_type_range = tnum_unknown;
16508 	const struct bpf_prog *prog = env->prog;
16509 	struct bpf_reg_state *reg = reg_state(env, regno);
16510 	struct bpf_retval_range range = retval_range(0, 1);
16511 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
16512 	struct bpf_func_state *frame = env->cur_state->frame[0];
16513 	const struct btf_type *reg_type, *ret_type = NULL;
16514 	int err;
16515 
16516 	/* LSM and struct_ops func-ptr's return type could be "void" */
16517 	if (!frame->in_async_callback_fn && program_returns_void(env))
16518 		return 0;
16519 
16520 	if (prog_type == BPF_PROG_TYPE_STRUCT_OPS) {
16521 		/* Allow a struct_ops program to return a referenced kptr if it
16522 		 * matches the operator's return type and is in its unmodified
16523 		 * form. A scalar zero (i.e., a null pointer) is also allowed.
16524 		 */
16525 		reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL;
16526 		ret_type = btf_type_resolve_ptr(prog->aux->attach_btf,
16527 						prog->aux->attach_func_proto->type,
16528 						NULL);
16529 		if (ret_type && ret_type == reg_type && reg_is_referenced(env, reg))
16530 			return __check_ptr_off_reg(env, reg, argno_from_reg(regno), false);
16531 	}
16532 
16533 	/* eBPF calling convention is such that R0 is used
16534 	 * to return the value from eBPF program.
16535 	 * Make sure that it's readable at this time
16536 	 * of bpf_exit, which means that program wrote
16537 	 * something into it earlier
16538 	 */
16539 	err = check_reg_arg(env, regno, SRC_OP);
16540 	if (err)
16541 		return err;
16542 
16543 	if (is_pointer_value(env, regno)) {
16544 		verbose(env, "R%d leaks addr as return value\n", regno);
16545 		return -EACCES;
16546 	}
16547 
16548 	if (frame->in_async_callback_fn) {
16549 		exit_ctx = "At async callback return";
16550 		range = frame->callback_ret_range;
16551 		goto enforce_retval;
16552 	}
16553 
16554 	if (prog_type == BPF_PROG_TYPE_STRUCT_OPS && !ret_type)
16555 		return 0;
16556 
16557 	if (prog_type == BPF_PROG_TYPE_CGROUP_SKB && (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS))
16558 		enforce_attach_type_range = tnum_range(2, 3);
16559 
16560 	if (!return_retval_range(env, &range))
16561 		return 0;
16562 
16563 enforce_retval:
16564 	if (reg->type != SCALAR_VALUE) {
16565 		verbose(env, "%s the register R%d is not a known value (%s)\n",
16566 			exit_ctx, regno, reg_type_str(env, reg->type));
16567 		return -EINVAL;
16568 	}
16569 
16570 	err = mark_chain_precision(env, regno);
16571 	if (err)
16572 		return err;
16573 
16574 	if (!retval_range_within(range, reg)) {
16575 		verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
16576 		if (prog->expected_attach_type == BPF_LSM_CGROUP &&
16577 		    prog_type == BPF_PROG_TYPE_LSM &&
16578 		    !prog->aux->attach_func_proto->type)
16579 			verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
16580 		return -EINVAL;
16581 	}
16582 
16583 	if (!tnum_is_unknown(enforce_attach_type_range) &&
16584 	    tnum_in(enforce_attach_type_range, reg->var_off))
16585 		env->prog->enforce_expected_attach_type = 1;
16586 	return 0;
16587 }
16588 
16589 static int check_global_subprog_return_code(struct bpf_verifier_env *env)
16590 {
16591 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
16592 	struct bpf_func_state *cur_frame = cur_func(env);
16593 	int err;
16594 
16595 	if (subprog_returns_void(env, cur_frame->subprogno))
16596 		return 0;
16597 
16598 	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
16599 	if (err)
16600 		return err;
16601 
16602 	/* Pointers to arena are safe to pass between subprograms. */
16603 	if (is_arena_reg(env, BPF_REG_0))
16604 		return 0;
16605 
16606 	if (is_pointer_value(env, BPF_REG_0)) {
16607 		verbose(env, "R%d leaks addr as return value\n", BPF_REG_0);
16608 		return -EACCES;
16609 	}
16610 
16611 	if (reg->type != SCALAR_VALUE) {
16612 		verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
16613 			reg_type_str(env, reg->type));
16614 		return -EINVAL;
16615 	}
16616 
16617 	return 0;
16618 }
16619 
16620 /* Bitmask with 1s for all caller saved registers */
16621 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
16622 
16623 /* True if do_misc_fixups() replaces calls to helper number 'imm',
16624  * replacement patch is presumed to follow bpf_fastcall contract
16625  * (see mark_fastcall_pattern_for_call() below).
16626  */
16627 bool bpf_verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
16628 {
16629 	switch (imm) {
16630 #ifdef CONFIG_X86_64
16631 	case BPF_FUNC_get_smp_processor_id:
16632 #ifdef CONFIG_SMP
16633 	case BPF_FUNC_get_current_task_btf:
16634 	case BPF_FUNC_get_current_task:
16635 #endif
16636 		return env->prog->jit_requested && bpf_jit_supports_percpu_insn();
16637 #endif
16638 	default:
16639 		return false;
16640 	}
16641 }
16642 
16643 /* If @call is a kfunc or helper call, fills @cs and returns true,
16644  * otherwise returns false.
16645  */
16646 bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
16647 			  struct bpf_call_summary *cs)
16648 {
16649 	struct bpf_kfunc_call_arg_meta meta;
16650 	const struct bpf_func_proto *fn;
16651 	int i;
16652 
16653 	if (bpf_helper_call(call)) {
16654 
16655 		if (bpf_get_helper_proto(env, call->imm, &fn) < 0)
16656 			/* error would be reported later */
16657 			return false;
16658 		cs->fastcall = fn->allow_fastcall &&
16659 			       (bpf_verifier_inlines_helper_call(env, call->imm) ||
16660 				bpf_jit_inlines_helper_call(call->imm));
16661 		cs->is_void = fn->ret_type == RET_VOID;
16662 		cs->num_params = 0;
16663 		for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) {
16664 			if (fn->arg_type[i] == ARG_DONTCARE)
16665 				break;
16666 			cs->num_params++;
16667 		}
16668 		return true;
16669 	}
16670 
16671 	if (bpf_pseudo_kfunc_call(call)) {
16672 		int err;
16673 
16674 		err = bpf_fetch_kfunc_arg_meta(env, call->imm, call->off, &meta);
16675 		if (err < 0)
16676 			/* error would be reported later */
16677 			return false;
16678 		cs->num_params = btf_type_vlen(meta.func_proto);
16679 		cs->fastcall = meta.kfunc_flags & KF_FASTCALL;
16680 		cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type));
16681 		return true;
16682 	}
16683 
16684 	return false;
16685 }
16686 
16687 /* LLVM define a bpf_fastcall function attribute.
16688  * This attribute means that function scratches only some of
16689  * the caller saved registers defined by ABI.
16690  * For BPF the set of such registers could be defined as follows:
16691  * - R0 is scratched only if function is non-void;
16692  * - R1-R5 are scratched only if corresponding parameter type is defined
16693  *   in the function prototype.
16694  *
16695  * The contract between kernel and clang allows to simultaneously use
16696  * such functions and maintain backwards compatibility with old
16697  * kernels that don't understand bpf_fastcall calls:
16698  *
16699  * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5
16700  *   registers are not scratched by the call;
16701  *
16702  * - as a post-processing step, clang visits each bpf_fastcall call and adds
16703  *   spill/fill for every live r0-r5;
16704  *
16705  * - stack offsets used for the spill/fill are allocated as lowest
16706  *   stack offsets in whole function and are not used for any other
16707  *   purposes;
16708  *
16709  * - when kernel loads a program, it looks for such patterns
16710  *   (bpf_fastcall function surrounded by spills/fills) and checks if
16711  *   spill/fill stack offsets are used exclusively in fastcall patterns;
16712  *
16713  * - if so, and if verifier or current JIT inlines the call to the
16714  *   bpf_fastcall function (e.g. a helper call), kernel removes unnecessary
16715  *   spill/fill pairs;
16716  *
16717  * - when old kernel loads a program, presence of spill/fill pairs
16718  *   keeps BPF program valid, albeit slightly less efficient.
16719  *
16720  * For example:
16721  *
16722  *   r1 = 1;
16723  *   r2 = 2;
16724  *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
16725  *   *(u64 *)(r10 - 16) = r2;            r2 = 2;
16726  *   call %[to_be_inlined]         -->   call %[to_be_inlined]
16727  *   r2 = *(u64 *)(r10 - 16);            r0 = r1;
16728  *   r1 = *(u64 *)(r10 - 8);             r0 += r2;
16729  *   r0 = r1;                            exit;
16730  *   r0 += r2;
16731  *   exit;
16732  *
16733  * The purpose of mark_fastcall_pattern_for_call is to:
16734  * - look for such patterns;
16735  * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern;
16736  * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction;
16737  * - update env->subprog_info[*]->fastcall_stack_off to find an offset
16738  *   at which bpf_fastcall spill/fill stack slots start;
16739  * - update env->subprog_info[*]->keep_fastcall_stack.
16740  *
16741  * The .fastcall_pattern and .fastcall_stack_off are used by
16742  * check_fastcall_stack_contract() to check if every stack access to
16743  * fastcall spill/fill stack slot originates from spill/fill
16744  * instructions, members of fastcall patterns.
16745  *
16746  * If such condition holds true for a subprogram, fastcall patterns could
16747  * be rewritten by remove_fastcall_spills_fills().
16748  * Otherwise bpf_fastcall patterns are not changed in the subprogram
16749  * (code, presumably, generated by an older clang version).
16750  *
16751  * For example, it is *not* safe to remove spill/fill below:
16752  *
16753  *   r1 = 1;
16754  *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
16755  *   call %[to_be_inlined]         -->   call %[to_be_inlined]
16756  *   r1 = *(u64 *)(r10 - 8);             r0 = *(u64 *)(r10 - 8);  <---- wrong !!!
16757  *   r0 = *(u64 *)(r10 - 8);             r0 += r1;
16758  *   r0 += r1;                           exit;
16759  *   exit;
16760  */
16761 static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
16762 					   struct bpf_subprog_info *subprog,
16763 					   int insn_idx, s16 lowest_off)
16764 {
16765 	struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
16766 	struct bpf_insn *call = &env->prog->insnsi[insn_idx];
16767 	u32 clobbered_regs_mask;
16768 	struct bpf_call_summary cs;
16769 	u32 expected_regs_mask;
16770 	s16 off;
16771 	int i;
16772 
16773 	if (!bpf_get_call_summary(env, call, &cs))
16774 		return;
16775 
16776 	/* A bitmask specifying which caller saved registers are clobbered
16777 	 * by a call to a helper/kfunc *as if* this helper/kfunc follows
16778 	 * bpf_fastcall contract:
16779 	 * - includes R0 if function is non-void;
16780 	 * - includes R1-R5 if corresponding parameter has is described
16781 	 *   in the function prototype.
16782 	 */
16783 	clobbered_regs_mask = GENMASK(cs.num_params, cs.is_void ? 1 : 0);
16784 	/* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */
16785 	expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS;
16786 
16787 	/* match pairs of form:
16788 	 *
16789 	 * *(u64 *)(r10 - Y) = rX   (where Y % 8 == 0)
16790 	 * ...
16791 	 * call %[to_be_inlined]
16792 	 * ...
16793 	 * rX = *(u64 *)(r10 - Y)
16794 	 */
16795 	for (i = 1, off = lowest_off; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) {
16796 		if (insn_idx - i < 0 || insn_idx + i >= env->prog->len)
16797 			break;
16798 		stx = &insns[insn_idx - i];
16799 		ldx = &insns[insn_idx + i];
16800 		/* must be a stack spill/fill pair */
16801 		if (stx->code != (BPF_STX | BPF_MEM | BPF_DW) ||
16802 		    ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) ||
16803 		    stx->dst_reg != BPF_REG_10 ||
16804 		    ldx->src_reg != BPF_REG_10)
16805 			break;
16806 		/* must be a spill/fill for the same reg */
16807 		if (stx->src_reg != ldx->dst_reg)
16808 			break;
16809 		/* must be one of the previously unseen registers */
16810 		if ((BIT(stx->src_reg) & expected_regs_mask) == 0)
16811 			break;
16812 		/* must be a spill/fill for the same expected offset,
16813 		 * no need to check offset alignment, BPF_DW stack access
16814 		 * is always 8-byte aligned.
16815 		 */
16816 		if (stx->off != off || ldx->off != off)
16817 			break;
16818 		expected_regs_mask &= ~BIT(stx->src_reg);
16819 		env->insn_aux_data[insn_idx - i].fastcall_pattern = 1;
16820 		env->insn_aux_data[insn_idx + i].fastcall_pattern = 1;
16821 	}
16822 	if (i == 1)
16823 		return;
16824 
16825 	/* Conditionally set 'fastcall_spills_num' to allow forward
16826 	 * compatibility when more helper functions are marked as
16827 	 * bpf_fastcall at compile time than current kernel supports, e.g:
16828 	 *
16829 	 *   1: *(u64 *)(r10 - 8) = r1
16830 	 *   2: call A                  ;; assume A is bpf_fastcall for current kernel
16831 	 *   3: r1 = *(u64 *)(r10 - 8)
16832 	 *   4: *(u64 *)(r10 - 8) = r1
16833 	 *   5: call B                  ;; assume B is not bpf_fastcall for current kernel
16834 	 *   6: r1 = *(u64 *)(r10 - 8)
16835 	 *
16836 	 * There is no need to block bpf_fastcall rewrite for such program.
16837 	 * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy,
16838 	 * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills()
16839 	 * does not remove spill/fill pair {4,6}.
16840 	 */
16841 	if (cs.fastcall)
16842 		env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1;
16843 	else
16844 		subprog->keep_fastcall_stack = 1;
16845 	subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off);
16846 }
16847 
16848 static int mark_fastcall_patterns(struct bpf_verifier_env *env)
16849 {
16850 	struct bpf_subprog_info *subprog = env->subprog_info;
16851 	struct bpf_insn *insn;
16852 	s16 lowest_off;
16853 	int s, i;
16854 
16855 	for (s = 0; s < env->subprog_cnt; ++s, ++subprog) {
16856 		/* find lowest stack spill offset used in this subprog */
16857 		lowest_off = 0;
16858 		for (i = subprog->start; i < (subprog + 1)->start; ++i) {
16859 			insn = env->prog->insnsi + i;
16860 			if (insn->code != (BPF_STX | BPF_MEM | BPF_DW) ||
16861 			    insn->dst_reg != BPF_REG_10)
16862 				continue;
16863 			lowest_off = min(lowest_off, insn->off);
16864 		}
16865 		/* use this offset to find fastcall patterns */
16866 		for (i = subprog->start; i < (subprog + 1)->start; ++i) {
16867 			insn = env->prog->insnsi + i;
16868 			if (insn->code != (BPF_JMP | BPF_CALL))
16869 				continue;
16870 			mark_fastcall_pattern_for_call(env, subprog, i, lowest_off);
16871 		}
16872 	}
16873 	return 0;
16874 }
16875 
16876 static void adjust_btf_func(struct bpf_verifier_env *env)
16877 {
16878 	struct bpf_prog_aux *aux = env->prog->aux;
16879 	int i;
16880 
16881 	if (!aux->func_info)
16882 		return;
16883 
16884 	/* func_info is not available for hidden subprogs */
16885 	for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++)
16886 		aux->func_info[i].insn_off = env->subprog_info[i].start;
16887 }
16888 
16889 /* Find id in idset and increment its count, or add new entry */
16890 static void idset_cnt_inc(struct bpf_idset *idset, u32 id)
16891 {
16892 	u32 i;
16893 
16894 	for (i = 0; i < idset->num_ids; i++) {
16895 		if (idset->entries[i].id == id) {
16896 			idset->entries[i].cnt++;
16897 			return;
16898 		}
16899 	}
16900 	/* New id */
16901 	if (idset->num_ids < BPF_ID_MAP_SIZE) {
16902 		idset->entries[idset->num_ids].id = id;
16903 		idset->entries[idset->num_ids].cnt = 1;
16904 		idset->num_ids++;
16905 	}
16906 }
16907 
16908 /* Find id in idset and return its count, or 0 if not found */
16909 static u32 idset_cnt_get(struct bpf_idset *idset, u32 id)
16910 {
16911 	u32 i;
16912 
16913 	for (i = 0; i < idset->num_ids; i++) {
16914 		if (idset->entries[i].id == id)
16915 			return idset->entries[i].cnt;
16916 	}
16917 	return 0;
16918 }
16919 
16920 /*
16921  * Clear singular scalar ids in a state.
16922  * A register with a non-zero id is called singular if no other register shares
16923  * the same base id. Such registers can be treated as independent (id=0).
16924  */
16925 void bpf_clear_singular_ids(struct bpf_verifier_env *env,
16926 			    struct bpf_verifier_state *st)
16927 {
16928 	struct bpf_idset *idset = &env->idset_scratch;
16929 	struct bpf_func_state *func;
16930 	struct bpf_reg_state *reg;
16931 
16932 	idset->num_ids = 0;
16933 
16934 	bpf_for_each_reg_in_vstate(st, func, reg, ({
16935 		if (reg->type != SCALAR_VALUE)
16936 			continue;
16937 		if (!reg->id)
16938 			continue;
16939 		idset_cnt_inc(idset, reg->id & ~BPF_ADD_CONST);
16940 	}));
16941 
16942 	bpf_for_each_reg_in_vstate(st, func, reg, ({
16943 		if (reg->type != SCALAR_VALUE)
16944 			continue;
16945 		if (!reg->id)
16946 			continue;
16947 		if (idset_cnt_get(idset, reg->id & ~BPF_ADD_CONST) == 1)
16948 			clear_scalar_id(reg);
16949 	}));
16950 }
16951 
16952 /* Return true if it's OK to have the same insn return a different type. */
16953 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
16954 {
16955 	switch (base_type(type)) {
16956 	case PTR_TO_CTX:
16957 	case PTR_TO_SOCKET:
16958 	case PTR_TO_SOCK_COMMON:
16959 	case PTR_TO_TCP_SOCK:
16960 	case PTR_TO_XDP_SOCK:
16961 	case PTR_TO_BTF_ID:
16962 	case PTR_TO_ARENA:
16963 		return false;
16964 	default:
16965 		return true;
16966 	}
16967 }
16968 
16969 /* If an instruction was previously used with particular pointer types, then we
16970  * need to be careful to avoid cases such as the below, where it may be ok
16971  * for one branch accessing the pointer, but not ok for the other branch:
16972  *
16973  * R1 = sock_ptr
16974  * goto X;
16975  * ...
16976  * R1 = some_other_valid_ptr;
16977  * goto X;
16978  * ...
16979  * R2 = *(u32 *)(R1 + 0);
16980  */
16981 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
16982 {
16983 	return src != prev && (!reg_type_mismatch_ok(src) ||
16984 			       !reg_type_mismatch_ok(prev));
16985 }
16986 
16987 static bool is_ptr_to_mem_or_btf_id(enum bpf_reg_type type)
16988 {
16989 	switch (base_type(type)) {
16990 	case PTR_TO_MEM:
16991 	case PTR_TO_BTF_ID:
16992 		return true;
16993 	default:
16994 		return false;
16995 	}
16996 }
16997 
16998 static bool is_ptr_to_mem(enum bpf_reg_type type)
16999 {
17000 	return base_type(type) == PTR_TO_MEM;
17001 }
17002 
17003 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
17004 			     bool allow_trust_mismatch)
17005 {
17006 	enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
17007 	enum bpf_reg_type merged_type;
17008 
17009 	if (*prev_type == NOT_INIT) {
17010 		/* Saw a valid insn
17011 		 * dst_reg = *(u32 *)(src_reg + off)
17012 		 * save type to validate intersecting paths
17013 		 */
17014 		*prev_type = type;
17015 	} else if (reg_type_mismatch(type, *prev_type)) {
17016 		/* Abuser program is trying to use the same insn
17017 		 * dst_reg = *(u32*) (src_reg + off)
17018 		 * with different pointer types:
17019 		 * src_reg == ctx in one branch and
17020 		 * src_reg == stack|map in some other branch.
17021 		 * Reject it.
17022 		 */
17023 		if (allow_trust_mismatch &&
17024 		    is_ptr_to_mem_or_btf_id(type) &&
17025 		    is_ptr_to_mem_or_btf_id(*prev_type)) {
17026 			/*
17027 			 * Have to support a use case when one path through
17028 			 * the program yields TRUSTED pointer while another
17029 			 * is UNTRUSTED. Fallback to UNTRUSTED to generate
17030 			 * BPF_PROBE_MEM/BPF_PROBE_MEMSX.
17031 			 * Same behavior of MEM_RDONLY flag.
17032 			 */
17033 			if (is_ptr_to_mem(type) || is_ptr_to_mem(*prev_type))
17034 				merged_type = PTR_TO_MEM;
17035 			else
17036 				merged_type = PTR_TO_BTF_ID;
17037 			if ((type & PTR_UNTRUSTED) || (*prev_type & PTR_UNTRUSTED))
17038 				merged_type |= PTR_UNTRUSTED;
17039 			if ((type & MEM_RDONLY) || (*prev_type & MEM_RDONLY))
17040 				merged_type |= MEM_RDONLY;
17041 			*prev_type = merged_type;
17042 		} else {
17043 			verbose(env, "same insn cannot be used with different pointers\n");
17044 			return -EINVAL;
17045 		}
17046 	}
17047 
17048 	return 0;
17049 }
17050 
17051 enum {
17052 	PROCESS_BPF_EXIT = 1,
17053 	INSN_IDX_UPDATED = 2,
17054 };
17055 
17056 static int process_bpf_exit_full(struct bpf_verifier_env *env,
17057 				 bool *do_print_state,
17058 				 bool exception_exit)
17059 {
17060 	struct bpf_func_state *cur_frame = cur_func(env);
17061 
17062 	/* We must do check_reference_leak here before
17063 	 * prepare_func_exit to handle the case when
17064 	 * state->curframe > 0, it may be a callback function,
17065 	 * for which reference_state must match caller reference
17066 	 * state when it exits.
17067 	 */
17068 	int err = check_resource_leak(env, exception_exit,
17069 				      exception_exit || !env->cur_state->curframe,
17070 				      exception_exit ? "bpf_throw" :
17071 				      "BPF_EXIT instruction in main prog");
17072 	if (err)
17073 		return err;
17074 
17075 	/* The side effect of the prepare_func_exit which is
17076 	 * being skipped is that it frees bpf_func_state.
17077 	 * Typically, process_bpf_exit will only be hit with
17078 	 * outermost exit. copy_verifier_state in pop_stack will
17079 	 * handle freeing of any extra bpf_func_state left over
17080 	 * from not processing all nested function exits. We
17081 	 * also skip return code checks as they are not needed
17082 	 * for exceptional exits.
17083 	 */
17084 	if (exception_exit)
17085 		return PROCESS_BPF_EXIT;
17086 
17087 	if (env->cur_state->curframe) {
17088 		/* exit from nested function */
17089 		err = prepare_func_exit(env, &env->insn_idx);
17090 		if (err)
17091 			return err;
17092 		*do_print_state = true;
17093 		return INSN_IDX_UPDATED;
17094 	}
17095 
17096 	/*
17097 	 * Return from a regular global subprogram differs from return
17098 	 * from the main program or async/exception callback.
17099 	 * Main program exit implies return code restrictions
17100 	 * that depend on program type.
17101 	 * Exit from exception callback is equivalent to main program exit.
17102 	 * Exit from async callback implies return code restrictions
17103 	 * that depend on async scheduling mechanism.
17104 	 */
17105 	if (cur_frame->subprogno &&
17106 	    !cur_frame->in_async_callback_fn &&
17107 	    !cur_frame->in_exception_callback_fn)
17108 		err = check_global_subprog_return_code(env);
17109 	else
17110 		err = check_return_code(env, BPF_REG_0, "R0");
17111 	if (err)
17112 		return err;
17113 	return PROCESS_BPF_EXIT;
17114 }
17115 
17116 static int indirect_jump_min_max_index(struct bpf_verifier_env *env,
17117 				       int regno,
17118 				       struct bpf_map *map,
17119 				       u32 *pmin_index, u32 *pmax_index)
17120 {
17121 	struct bpf_reg_state *reg = reg_state(env, regno);
17122 	u64 min_index = reg_umin(reg);
17123 	u64 max_index = reg_umax(reg);
17124 	const u32 size = 8;
17125 
17126 	if (min_index > (u64) U32_MAX * size) {
17127 		verbose(env, "the sum of R%u umin_value %llu is too big\n", regno, reg_umin(reg));
17128 		return -ERANGE;
17129 	}
17130 	if (max_index > (u64) U32_MAX * size) {
17131 		verbose(env, "the sum of R%u umax_value %llu is too big\n", regno, reg_umax(reg));
17132 		return -ERANGE;
17133 	}
17134 
17135 	min_index /= size;
17136 	max_index /= size;
17137 
17138 	if (max_index >= map->max_entries) {
17139 		verbose(env, "R%u points to outside of jump table: [%llu,%llu] max_entries %u\n",
17140 			     regno, min_index, max_index, map->max_entries);
17141 		return -EINVAL;
17142 	}
17143 
17144 	*pmin_index = min_index;
17145 	*pmax_index = max_index;
17146 	return 0;
17147 }
17148 
17149 /* gotox *dst_reg */
17150 static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *insn)
17151 {
17152 	struct bpf_verifier_state *other_branch;
17153 	struct bpf_reg_state *dst_reg;
17154 	struct bpf_map *map;
17155 	u32 min_index, max_index;
17156 	int err = 0;
17157 	int n;
17158 	int i;
17159 
17160 	dst_reg = reg_state(env, insn->dst_reg);
17161 	if (dst_reg->type != PTR_TO_INSN) {
17162 		verbose(env, "R%d has type %s, expected PTR_TO_INSN\n",
17163 			     insn->dst_reg, reg_type_str(env, dst_reg->type));
17164 		return -EINVAL;
17165 	}
17166 
17167 	map = dst_reg->map_ptr;
17168 	if (verifier_bug_if(!map, env, "R%d has an empty map pointer", insn->dst_reg))
17169 		return -EFAULT;
17170 
17171 	if (verifier_bug_if(map->map_type != BPF_MAP_TYPE_INSN_ARRAY, env,
17172 			    "R%d has incorrect map type %d", insn->dst_reg, map->map_type))
17173 		return -EFAULT;
17174 
17175 	err = indirect_jump_min_max_index(env, insn->dst_reg, map, &min_index, &max_index);
17176 	if (err)
17177 		return err;
17178 
17179 	/* Ensure that the buffer is large enough */
17180 	if (!env->gotox_tmp_buf || env->gotox_tmp_buf->cnt < max_index - min_index + 1) {
17181 		env->gotox_tmp_buf = bpf_iarray_realloc(env->gotox_tmp_buf,
17182 						        max_index - min_index + 1);
17183 		if (!env->gotox_tmp_buf)
17184 			return -ENOMEM;
17185 	}
17186 
17187 	n = bpf_copy_insn_array_uniq(map, min_index, max_index, env->gotox_tmp_buf->items);
17188 	if (n < 0)
17189 		return n;
17190 	if (n == 0) {
17191 		verbose(env, "register R%d doesn't point to any offset in map id=%d\n",
17192 			     insn->dst_reg, map->id);
17193 		return -EINVAL;
17194 	}
17195 
17196 	for (i = 0; i < n - 1; i++) {
17197 		mark_indirect_target(env, env->gotox_tmp_buf->items[i]);
17198 		other_branch = push_stack(env, env->gotox_tmp_buf->items[i],
17199 					  env->insn_idx, env->cur_state->speculative);
17200 		if (IS_ERR(other_branch))
17201 			return PTR_ERR(other_branch);
17202 	}
17203 	env->insn_idx = env->gotox_tmp_buf->items[n-1];
17204 	mark_indirect_target(env, env->insn_idx);
17205 	return INSN_IDX_UPDATED;
17206 }
17207 
17208 static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state)
17209 {
17210 	int err;
17211 	struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx];
17212 	u8 class = BPF_CLASS(insn->code);
17213 
17214 	switch (class) {
17215 	case BPF_ALU:
17216 	case BPF_ALU64:
17217 		return check_alu_op(env, insn);
17218 
17219 	case BPF_LDX:
17220 		return check_load_mem(env, insn, false,
17221 				      BPF_MODE(insn->code) == BPF_MEMSX,
17222 				      true, "ldx");
17223 
17224 	case BPF_STX:
17225 		if (BPF_MODE(insn->code) == BPF_ATOMIC)
17226 			return check_atomic(env, insn);
17227 		return check_store_reg(env, insn, false);
17228 
17229 	case BPF_ST: {
17230 		/* Handle stack arg write (store immediate) */
17231 		if (is_stack_arg_st(insn)) {
17232 			struct bpf_verifier_state *vstate = env->cur_state;
17233 			struct bpf_func_state *state = vstate->frame[vstate->curframe];
17234 
17235 			return check_stack_arg_write(env, state, insn->off, NULL);
17236 		}
17237 
17238 		enum bpf_reg_type dst_reg_type;
17239 
17240 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17241 		if (err)
17242 			return err;
17243 
17244 		dst_reg_type = cur_regs(env)[insn->dst_reg].type;
17245 
17246 		err = check_mem_access(env, env->insn_idx, cur_regs(env) + insn->dst_reg, argno_from_reg(insn->dst_reg),
17247 				       insn->off, BPF_SIZE(insn->code),
17248 				       BPF_WRITE, -1, false, false);
17249 		if (err)
17250 			return err;
17251 
17252 		return save_aux_ptr_type(env, dst_reg_type, false);
17253 	}
17254 	case BPF_JMP:
17255 	case BPF_JMP32: {
17256 		u8 opcode = BPF_OP(insn->code);
17257 
17258 		env->jmps_processed++;
17259 		if (opcode == BPF_CALL) {
17260 			if (env->cur_state->active_locks) {
17261 				if ((insn->src_reg == BPF_REG_0 &&
17262 				     insn->imm != BPF_FUNC_spin_unlock &&
17263 				     insn->imm != BPF_FUNC_kptr_xchg) ||
17264 				    (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
17265 				     (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) {
17266 					verbose(env,
17267 						"function calls are not allowed while holding a lock\n");
17268 					return -EINVAL;
17269 				}
17270 			}
17271 			mark_reg_scratched(env, BPF_REG_0);
17272 			if (bpf_in_stack_arg_cnt(&env->subprog_info[cur_func(env)->subprogno]))
17273 				cur_func(env)->no_stack_arg_load = true;
17274 			if (insn->src_reg == BPF_PSEUDO_CALL)
17275 				return check_func_call(env, insn, &env->insn_idx);
17276 			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
17277 				return check_kfunc_call(env, insn, &env->insn_idx);
17278 			return check_helper_call(env, insn, &env->insn_idx);
17279 		} else if (opcode == BPF_JA) {
17280 			if (BPF_SRC(insn->code) == BPF_X)
17281 				return check_indirect_jump(env, insn);
17282 
17283 			if (class == BPF_JMP)
17284 				env->insn_idx += insn->off + 1;
17285 			else
17286 				env->insn_idx += insn->imm + 1;
17287 			return INSN_IDX_UPDATED;
17288 		} else if (opcode == BPF_EXIT) {
17289 			return process_bpf_exit_full(env, do_print_state, false);
17290 		}
17291 		return check_cond_jmp_op(env, insn, &env->insn_idx);
17292 	}
17293 	case BPF_LD: {
17294 		u8 mode = BPF_MODE(insn->code);
17295 
17296 		if (mode == BPF_ABS || mode == BPF_IND)
17297 			return check_ld_abs(env, insn);
17298 
17299 		if (mode == BPF_IMM) {
17300 			err = check_ld_imm(env, insn);
17301 			if (err)
17302 				return err;
17303 
17304 			env->insn_idx++;
17305 			sanitize_mark_insn_seen(env);
17306 		}
17307 		return 0;
17308 	}
17309 	}
17310 	/* all class values are handled above. silence compiler warning */
17311 	return -EFAULT;
17312 }
17313 
17314 static int do_check(struct bpf_verifier_env *env)
17315 {
17316 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
17317 	struct bpf_verifier_state *state = env->cur_state;
17318 	struct bpf_insn *insns = env->prog->insnsi;
17319 	int insn_cnt = env->prog->len;
17320 	bool do_print_state = false;
17321 	int prev_insn_idx = -1;
17322 
17323 	for (;;) {
17324 		struct bpf_insn *insn;
17325 		struct bpf_insn_aux_data *insn_aux;
17326 		int err;
17327 
17328 		/* reset current history entry on each new instruction */
17329 		env->cur_hist_ent = NULL;
17330 
17331 		env->prev_insn_idx = prev_insn_idx;
17332 		if (env->insn_idx >= insn_cnt) {
17333 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
17334 				env->insn_idx, insn_cnt);
17335 			return -EFAULT;
17336 		}
17337 
17338 		insn = &insns[env->insn_idx];
17339 		insn_aux = &env->insn_aux_data[env->insn_idx];
17340 
17341 		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
17342 			verbose(env,
17343 				"BPF program is too large. Processed %d insn\n",
17344 				env->insn_processed);
17345 			return -E2BIG;
17346 		}
17347 
17348 		state->last_insn_idx = env->prev_insn_idx;
17349 		state->insn_idx = env->insn_idx;
17350 
17351 		if (bpf_is_prune_point(env, env->insn_idx)) {
17352 			err = bpf_is_state_visited(env, env->insn_idx);
17353 			if (err < 0)
17354 				return err;
17355 			if (err == 1) {
17356 				/* found equivalent state, can prune the search */
17357 				if (env->log.level & BPF_LOG_LEVEL) {
17358 					if (do_print_state)
17359 						verbose(env, "\nfrom %d to %d%s: safe\n",
17360 							env->prev_insn_idx, env->insn_idx,
17361 							env->cur_state->speculative ?
17362 							" (speculative execution)" : "");
17363 					else
17364 						verbose(env, "%d: safe\n", env->insn_idx);
17365 				}
17366 				goto process_bpf_exit;
17367 			}
17368 		}
17369 
17370 		if (bpf_is_jmp_point(env, env->insn_idx)) {
17371 			err = bpf_push_jmp_history(env, state, 0, 0, 0, 0);
17372 			if (err)
17373 				return err;
17374 		}
17375 
17376 		if (signal_pending(current))
17377 			return -EAGAIN;
17378 
17379 		if (need_resched())
17380 			cond_resched();
17381 
17382 		if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
17383 			verbose(env, "\nfrom %d to %d%s:",
17384 				env->prev_insn_idx, env->insn_idx,
17385 				env->cur_state->speculative ?
17386 				" (speculative execution)" : "");
17387 			print_verifier_state(env, state, state->curframe, true);
17388 			do_print_state = false;
17389 		}
17390 
17391 		if (env->log.level & BPF_LOG_LEVEL) {
17392 			if (verifier_state_scratched(env))
17393 				print_insn_state(env, state, state->curframe);
17394 
17395 			verbose_linfo(env, env->insn_idx, "; ");
17396 			env->prev_log_pos = env->log.end_pos;
17397 			verbose(env, "%d: ", env->insn_idx);
17398 			bpf_verbose_insn(env, insn);
17399 			env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
17400 			env->prev_log_pos = env->log.end_pos;
17401 		}
17402 
17403 		if (bpf_prog_is_offloaded(env->prog->aux)) {
17404 			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
17405 							   env->prev_insn_idx);
17406 			if (err)
17407 				return err;
17408 		}
17409 
17410 		sanitize_mark_insn_seen(env);
17411 		prev_insn_idx = env->insn_idx;
17412 
17413 		/* Sanity check: precomputed constants must match verifier state */
17414 		if (!state->speculative && insn_aux->const_reg_mask) {
17415 			struct bpf_reg_state *regs = cur_regs(env);
17416 			u16 mask = insn_aux->const_reg_mask;
17417 
17418 			for (int r = 0; r < ARRAY_SIZE(insn_aux->const_reg_vals); r++) {
17419 				u32 cval = insn_aux->const_reg_vals[r];
17420 
17421 				if (!(mask & BIT(r)))
17422 					continue;
17423 				if (regs[r].type != SCALAR_VALUE)
17424 					continue;
17425 				if (!tnum_is_const(regs[r].var_off))
17426 					continue;
17427 				if (verifier_bug_if((u32)regs[r].var_off.value != cval,
17428 						    env, "const R%d: %u != %llu",
17429 						    r, cval, regs[r].var_off.value))
17430 					return -EFAULT;
17431 			}
17432 		}
17433 
17434 		/* Reduce verification complexity by stopping speculative path
17435 		 * verification when a nospec is encountered.
17436 		 */
17437 		if (state->speculative && insn_aux->nospec)
17438 			goto process_bpf_exit;
17439 
17440 		err = do_check_insn(env, &do_print_state);
17441 		if (error_recoverable_with_nospec(err) && state->speculative) {
17442 			/* Prevent this speculative path from ever reaching the
17443 			 * insn that would have been unsafe to execute.
17444 			 */
17445 			insn_aux->nospec = true;
17446 			/* If it was an ADD/SUB insn, potentially remove any
17447 			 * markings for alu sanitization.
17448 			 */
17449 			insn_aux->alu_state = 0;
17450 			goto process_bpf_exit;
17451 		} else if (err < 0) {
17452 			return err;
17453 		} else if (err == PROCESS_BPF_EXIT) {
17454 			goto process_bpf_exit;
17455 		} else if (err == INSN_IDX_UPDATED) {
17456 		} else if (err == 0) {
17457 			env->insn_idx++;
17458 		}
17459 
17460 		if (state->speculative && insn_aux->nospec_result) {
17461 			/* If we are on a path that performed a jump-op, this
17462 			 * may skip a nospec patched-in after the jump. This can
17463 			 * currently never happen because nospec_result is only
17464 			 * used for the write-ops
17465 			 * `*(size*)(dst_reg+off)=src_reg|imm32` and helper
17466 			 * calls. These must never skip the following insn
17467 			 * (i.e., bpf_insn_successors()'s opcode_info.can_jump
17468 			 * is false). Still, add a warning to document this in
17469 			 * case nospec_result is used elsewhere in the future.
17470 			 *
17471 			 * All non-branch instructions have a single
17472 			 * fall-through edge. For these, nospec_result should
17473 			 * already work.
17474 			 */
17475 			if (verifier_bug_if((BPF_CLASS(insn->code) == BPF_JMP ||
17476 					     BPF_CLASS(insn->code) == BPF_JMP32) &&
17477 					    BPF_OP(insn->code) != BPF_CALL, env,
17478 					    "speculation barrier after jump instruction may not have the desired effect"))
17479 				return -EFAULT;
17480 process_bpf_exit:
17481 			mark_verifier_state_scratched(env);
17482 			err = bpf_update_branch_counts(env, env->cur_state);
17483 			if (err)
17484 				return err;
17485 			err = pop_stack(env, &prev_insn_idx, &env->insn_idx,
17486 					pop_log);
17487 			if (err < 0) {
17488 				if (err != -ENOENT)
17489 					return err;
17490 				break;
17491 			} else {
17492 				do_print_state = true;
17493 				continue;
17494 			}
17495 		}
17496 	}
17497 
17498 	return 0;
17499 }
17500 
17501 static int find_btf_percpu_datasec(struct btf *btf)
17502 {
17503 	const struct btf_type *t;
17504 	const char *tname;
17505 	int i, n;
17506 
17507 	/*
17508 	 * Both vmlinux and module each have their own ".data..percpu"
17509 	 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
17510 	 * types to look at only module's own BTF types.
17511 	 */
17512 	n = btf_nr_types(btf);
17513 	for (i = btf_named_start_id(btf, true); i < n; i++) {
17514 		t = btf_type_by_id(btf, i);
17515 		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
17516 			continue;
17517 
17518 		tname = btf_name_by_offset(btf, t->name_off);
17519 		if (!strcmp(tname, ".data..percpu"))
17520 			return i;
17521 	}
17522 
17523 	return -ENOENT;
17524 }
17525 
17526 /*
17527  * Add btf to the env->used_btfs array. If needed, refcount the
17528  * corresponding kernel module. To simplify caller's logic
17529  * in case of error or if btf was added before the function
17530  * decreases the btf refcount.
17531  */
17532 static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf)
17533 {
17534 	struct btf_mod_pair *btf_mod;
17535 	int ret = 0;
17536 	int i;
17537 
17538 	/* check whether we recorded this BTF (and maybe module) already */
17539 	for (i = 0; i < env->used_btf_cnt; i++)
17540 		if (env->used_btfs[i].btf == btf)
17541 			goto ret_put;
17542 
17543 	if (env->used_btf_cnt >= MAX_USED_BTFS) {
17544 		verbose(env, "The total number of btfs per program has reached the limit of %u\n",
17545 			MAX_USED_BTFS);
17546 		ret = -E2BIG;
17547 		goto ret_put;
17548 	}
17549 
17550 	btf_mod = &env->used_btfs[env->used_btf_cnt];
17551 	btf_mod->btf = btf;
17552 	btf_mod->module = NULL;
17553 
17554 	/* if we reference variables from kernel module, bump its refcount */
17555 	if (btf_is_module(btf)) {
17556 		btf_mod->module = btf_try_get_module(btf);
17557 		if (!btf_mod->module) {
17558 			ret = -ENXIO;
17559 			goto ret_put;
17560 		}
17561 	}
17562 
17563 	env->used_btf_cnt++;
17564 	return 0;
17565 
17566 ret_put:
17567 	/* Either error or this BTF was already added */
17568 	btf_put(btf);
17569 	return ret;
17570 }
17571 
17572 /* replace pseudo btf_id with kernel symbol address */
17573 static int __check_pseudo_btf_id(struct bpf_verifier_env *env,
17574 				 struct bpf_insn *insn,
17575 				 struct bpf_insn_aux_data *aux,
17576 				 struct btf *btf)
17577 {
17578 	const struct btf_var_secinfo *vsi;
17579 	const struct btf_type *datasec;
17580 	const struct btf_type *t;
17581 	const char *sym_name;
17582 	bool percpu = false;
17583 	u32 type, id = insn->imm;
17584 	s32 datasec_id;
17585 	u64 addr;
17586 	int i;
17587 
17588 	t = btf_type_by_id(btf, id);
17589 	if (!t) {
17590 		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
17591 		return -ENOENT;
17592 	}
17593 
17594 	if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
17595 		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
17596 		return -EINVAL;
17597 	}
17598 
17599 	sym_name = btf_name_by_offset(btf, t->name_off);
17600 	addr = kallsyms_lookup_name(sym_name);
17601 	if (!addr) {
17602 		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
17603 			sym_name);
17604 		return -ENOENT;
17605 	}
17606 	insn[0].imm = (u32)addr;
17607 	insn[1].imm = addr >> 32;
17608 
17609 	if (btf_type_is_func(t)) {
17610 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
17611 		aux->btf_var.mem_size = 0;
17612 		return 0;
17613 	}
17614 
17615 	datasec_id = find_btf_percpu_datasec(btf);
17616 	if (datasec_id > 0) {
17617 		datasec = btf_type_by_id(btf, datasec_id);
17618 		for_each_vsi(i, datasec, vsi) {
17619 			if (vsi->type == id) {
17620 				percpu = true;
17621 				break;
17622 			}
17623 		}
17624 	}
17625 
17626 	type = t->type;
17627 	t = btf_type_skip_modifiers(btf, type, NULL);
17628 	if (percpu) {
17629 		aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
17630 		aux->btf_var.btf = btf;
17631 		aux->btf_var.btf_id = type;
17632 	} else if (!btf_type_is_struct(t)) {
17633 		const struct btf_type *ret;
17634 		const char *tname;
17635 		u32 tsize;
17636 
17637 		/* resolve the type size of ksym. */
17638 		ret = btf_resolve_size(btf, t, &tsize);
17639 		if (IS_ERR(ret)) {
17640 			tname = btf_name_by_offset(btf, t->name_off);
17641 			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
17642 				tname, PTR_ERR(ret));
17643 			return -EINVAL;
17644 		}
17645 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
17646 		aux->btf_var.mem_size = tsize;
17647 	} else {
17648 		aux->btf_var.reg_type = PTR_TO_BTF_ID;
17649 		aux->btf_var.btf = btf;
17650 		aux->btf_var.btf_id = type;
17651 	}
17652 
17653 	return 0;
17654 }
17655 
17656 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
17657 			       struct bpf_insn *insn,
17658 			       struct bpf_insn_aux_data *aux)
17659 {
17660 	struct btf *btf;
17661 	int btf_fd;
17662 	int err;
17663 
17664 	btf_fd = insn[1].imm;
17665 	if (btf_fd) {
17666 		btf = btf_get_by_fd(btf_fd);
17667 		if (IS_ERR(btf)) {
17668 			verbose(env, "invalid module BTF object FD specified.\n");
17669 			return -EINVAL;
17670 		}
17671 	} else {
17672 		if (!btf_vmlinux) {
17673 			verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
17674 			return -EINVAL;
17675 		}
17676 		btf_get(btf_vmlinux);
17677 		btf = btf_vmlinux;
17678 	}
17679 
17680 	err = __check_pseudo_btf_id(env, insn, aux, btf);
17681 	if (err) {
17682 		btf_put(btf);
17683 		return err;
17684 	}
17685 
17686 	return __add_used_btf(env, btf);
17687 }
17688 
17689 static bool is_tracing_prog_type(enum bpf_prog_type type)
17690 {
17691 	switch (type) {
17692 	case BPF_PROG_TYPE_KPROBE:
17693 	case BPF_PROG_TYPE_TRACEPOINT:
17694 	case BPF_PROG_TYPE_PERF_EVENT:
17695 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
17696 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
17697 		return true;
17698 	default:
17699 		return false;
17700 	}
17701 }
17702 
17703 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
17704 {
17705 	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
17706 		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
17707 }
17708 
17709 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
17710 					struct bpf_map *map,
17711 					struct bpf_prog *prog)
17712 
17713 {
17714 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
17715 
17716 	if (map->excl_prog_sha &&
17717 	    memcmp(map->excl_prog_sha, prog->digest, SHA256_DIGEST_SIZE)) {
17718 		verbose(env, "program's hash doesn't match map's excl_prog_hash\n");
17719 		return -EACCES;
17720 	}
17721 
17722 	if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
17723 	    btf_record_has_field(map->record, BPF_RB_ROOT)) {
17724 		if (is_tracing_prog_type(prog_type)) {
17725 			verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
17726 			return -EINVAL;
17727 		}
17728 	}
17729 
17730 	if (btf_record_has_field(map->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
17731 		if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
17732 			verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
17733 			return -EINVAL;
17734 		}
17735 
17736 		if (is_tracing_prog_type(prog_type)) {
17737 			verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
17738 			return -EINVAL;
17739 		}
17740 	}
17741 
17742 	if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
17743 	    !bpf_offload_prog_map_match(prog, map)) {
17744 		verbose(env, "offload device mismatch between prog and map\n");
17745 		return -EINVAL;
17746 	}
17747 
17748 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
17749 		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
17750 		return -EINVAL;
17751 	}
17752 
17753 	if (prog->sleepable)
17754 		switch (map->map_type) {
17755 		case BPF_MAP_TYPE_HASH:
17756 		case BPF_MAP_TYPE_RHASH:
17757 		case BPF_MAP_TYPE_LRU_HASH:
17758 		case BPF_MAP_TYPE_ARRAY:
17759 		case BPF_MAP_TYPE_PERCPU_HASH:
17760 		case BPF_MAP_TYPE_PERCPU_ARRAY:
17761 		case BPF_MAP_TYPE_LRU_PERCPU_HASH:
17762 		case BPF_MAP_TYPE_LPM_TRIE:
17763 		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
17764 		case BPF_MAP_TYPE_HASH_OF_MAPS:
17765 		case BPF_MAP_TYPE_RINGBUF:
17766 		case BPF_MAP_TYPE_USER_RINGBUF:
17767 		case BPF_MAP_TYPE_INODE_STORAGE:
17768 		case BPF_MAP_TYPE_SK_STORAGE:
17769 		case BPF_MAP_TYPE_TASK_STORAGE:
17770 		case BPF_MAP_TYPE_CGRP_STORAGE:
17771 		case BPF_MAP_TYPE_QUEUE:
17772 		case BPF_MAP_TYPE_STACK:
17773 		case BPF_MAP_TYPE_ARENA:
17774 		case BPF_MAP_TYPE_INSN_ARRAY:
17775 		case BPF_MAP_TYPE_PROG_ARRAY:
17776 			break;
17777 		default:
17778 			verbose(env,
17779 				"Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
17780 			return -EINVAL;
17781 		}
17782 
17783 	if (bpf_map_is_cgroup_storage(map) &&
17784 	    bpf_cgroup_storage_assign(env->prog->aux, map)) {
17785 		verbose(env, "only one cgroup storage of each type is allowed\n");
17786 		return -EBUSY;
17787 	}
17788 
17789 	if (map->map_type == BPF_MAP_TYPE_ARENA) {
17790 		if (env->prog->aux->arena) {
17791 			verbose(env, "Only one arena per program\n");
17792 			return -EBUSY;
17793 		}
17794 		if (!env->allow_ptr_leaks || !env->bpf_capable) {
17795 			verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
17796 			return -EPERM;
17797 		}
17798 		if (!env->prog->jit_requested) {
17799 			verbose(env, "JIT is required to use arena\n");
17800 			return -EOPNOTSUPP;
17801 		}
17802 		if (!bpf_jit_supports_arena()) {
17803 			verbose(env, "JIT doesn't support arena\n");
17804 			return -EOPNOTSUPP;
17805 		}
17806 		env->prog->aux->arena = (void *)map;
17807 		if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
17808 			verbose(env, "arena's user address must be set via map_extra or mmap()\n");
17809 			return -EINVAL;
17810 		}
17811 	}
17812 
17813 	return 0;
17814 }
17815 
17816 static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)
17817 {
17818 	int i, err;
17819 
17820 	/* check whether we recorded this map already */
17821 	for (i = 0; i < env->used_map_cnt; i++)
17822 		if (env->used_maps[i] == map)
17823 			return i;
17824 
17825 	if (env->used_map_cnt >= MAX_USED_MAPS) {
17826 		verbose(env, "The total number of maps per program has reached the limit of %u\n",
17827 			MAX_USED_MAPS);
17828 		return -E2BIG;
17829 	}
17830 
17831 	err = check_map_prog_compatibility(env, map, env->prog);
17832 	if (err)
17833 		return err;
17834 
17835 	if (env->prog->sleepable)
17836 		atomic64_inc(&map->sleepable_refcnt);
17837 
17838 	/* hold the map. If the program is rejected by verifier,
17839 	 * the map will be released by release_maps() or it
17840 	 * will be used by the valid program until it's unloaded
17841 	 * and all maps are released in bpf_free_used_maps()
17842 	 */
17843 	bpf_map_inc(map);
17844 
17845 	env->used_maps[env->used_map_cnt++] = map;
17846 
17847 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
17848 		err = bpf_insn_array_init(map, env->prog);
17849 		if (err) {
17850 			verbose(env, "Failed to properly initialize insn array\n");
17851 			return err;
17852 		}
17853 		env->insn_array_maps[env->insn_array_map_cnt++] = map;
17854 	}
17855 
17856 	return env->used_map_cnt - 1;
17857 }
17858 
17859 /* Add map behind fd to used maps list, if it's not already there, and return
17860  * its index.
17861  * Returns <0 on error, or >= 0 index, on success.
17862  */
17863 static int add_used_map(struct bpf_verifier_env *env, int fd)
17864 {
17865 	struct bpf_map *map;
17866 	CLASS(fd, f)(fd);
17867 
17868 	map = __bpf_map_get(f);
17869 	if (IS_ERR(map)) {
17870 		verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
17871 		return PTR_ERR(map);
17872 	}
17873 
17874 	return __add_used_map(env, map);
17875 }
17876 
17877 static int check_alu_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
17878 {
17879 	u8 class = BPF_CLASS(insn->code);
17880 	u8 opcode = BPF_OP(insn->code);
17881 
17882 	switch (opcode) {
17883 	case BPF_NEG:
17884 		if (BPF_SRC(insn->code) != BPF_K || insn->src_reg != BPF_REG_0 ||
17885 		    insn->off != 0 || insn->imm != 0) {
17886 			verbose(env, "BPF_NEG uses reserved fields\n");
17887 			return -EINVAL;
17888 		}
17889 		return 0;
17890 	case BPF_END:
17891 		if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
17892 		    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
17893 		    (class == BPF_ALU64 && BPF_SRC(insn->code) != BPF_TO_LE)) {
17894 			verbose(env, "BPF_END uses reserved fields\n");
17895 			return -EINVAL;
17896 		}
17897 		return 0;
17898 	case BPF_MOV:
17899 		if (BPF_SRC(insn->code) == BPF_X) {
17900 			if (class == BPF_ALU) {
17901 				if ((insn->off != 0 && insn->off != 8 && insn->off != 16) ||
17902 				    insn->imm) {
17903 					verbose(env, "BPF_MOV uses reserved fields\n");
17904 					return -EINVAL;
17905 				}
17906 			} else if (insn->off == BPF_ADDR_SPACE_CAST) {
17907 				if (insn->imm != 1 && insn->imm != 1u << 16) {
17908 					verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
17909 					return -EINVAL;
17910 				}
17911 			} else if ((insn->off != 0 && insn->off != 8 &&
17912 				    insn->off != 16 && insn->off != 32) || insn->imm) {
17913 				verbose(env, "BPF_MOV uses reserved fields\n");
17914 				return -EINVAL;
17915 			}
17916 		} else if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
17917 			verbose(env, "BPF_MOV uses reserved fields\n");
17918 			return -EINVAL;
17919 		}
17920 		return 0;
17921 	case BPF_ADD:
17922 	case BPF_SUB:
17923 	case BPF_AND:
17924 	case BPF_OR:
17925 	case BPF_XOR:
17926 	case BPF_LSH:
17927 	case BPF_RSH:
17928 	case BPF_ARSH:
17929 	case BPF_MUL:
17930 	case BPF_DIV:
17931 	case BPF_MOD:
17932 		if (BPF_SRC(insn->code) == BPF_X) {
17933 			if (insn->imm != 0 || (insn->off != 0 && insn->off != 1) ||
17934 			    (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
17935 				verbose(env, "BPF_ALU uses reserved fields\n");
17936 				return -EINVAL;
17937 			}
17938 		} else if (insn->src_reg != BPF_REG_0 ||
17939 			   (insn->off != 0 && insn->off != 1) ||
17940 			   (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
17941 			verbose(env, "BPF_ALU uses reserved fields\n");
17942 			return -EINVAL;
17943 		}
17944 		return 0;
17945 	default:
17946 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
17947 		return -EINVAL;
17948 	}
17949 }
17950 
17951 static int check_jmp_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
17952 {
17953 	u8 class = BPF_CLASS(insn->code);
17954 	u8 opcode = BPF_OP(insn->code);
17955 
17956 	switch (opcode) {
17957 	case BPF_CALL:
17958 		if (BPF_SRC(insn->code) != BPF_K ||
17959 		    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL && insn->off != 0) ||
17960 		    (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL &&
17961 		     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
17962 		    insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
17963 			verbose(env, "BPF_CALL uses reserved fields\n");
17964 			return -EINVAL;
17965 		}
17966 		return 0;
17967 	case BPF_JA:
17968 		if (BPF_SRC(insn->code) == BPF_X) {
17969 			if (insn->src_reg != BPF_REG_0 || insn->imm != 0 || insn->off != 0) {
17970 				verbose(env, "BPF_JA|BPF_X uses reserved fields\n");
17971 				return -EINVAL;
17972 			}
17973 		} else if (insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 ||
17974 			   (class == BPF_JMP && insn->imm != 0) ||
17975 			   (class == BPF_JMP32 && insn->off != 0)) {
17976 			verbose(env, "BPF_JA uses reserved fields\n");
17977 			return -EINVAL;
17978 		}
17979 		return 0;
17980 	case BPF_EXIT:
17981 		if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 ||
17982 		    insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 ||
17983 		    class == BPF_JMP32) {
17984 			verbose(env, "BPF_EXIT uses reserved fields\n");
17985 			return -EINVAL;
17986 		}
17987 		return 0;
17988 	case BPF_JCOND:
17989 		if (insn->code != (BPF_JMP | BPF_JCOND) || insn->src_reg != BPF_MAY_GOTO ||
17990 		    insn->dst_reg || insn->imm) {
17991 			verbose(env, "invalid may_goto imm %d\n", insn->imm);
17992 			return -EINVAL;
17993 		}
17994 		return 0;
17995 	default:
17996 		if (BPF_SRC(insn->code) == BPF_X) {
17997 			if (insn->imm != 0) {
17998 				verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17999 				return -EINVAL;
18000 			}
18001 		} else if (insn->src_reg != BPF_REG_0) {
18002 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
18003 			return -EINVAL;
18004 		}
18005 		return 0;
18006 	}
18007 }
18008 
18009 static int check_insn_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
18010 {
18011 	switch (BPF_CLASS(insn->code)) {
18012 	case BPF_ALU:
18013 	case BPF_ALU64:
18014 		return check_alu_fields(env, insn);
18015 	case BPF_LDX:
18016 		if ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) ||
18017 		    insn->imm != 0) {
18018 			verbose(env, "BPF_LDX uses reserved fields\n");
18019 			return -EINVAL;
18020 		}
18021 		return 0;
18022 	case BPF_STX:
18023 		if (BPF_MODE(insn->code) == BPF_ATOMIC)
18024 			return 0;
18025 		if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
18026 			verbose(env, "BPF_STX uses reserved fields\n");
18027 			return -EINVAL;
18028 		}
18029 		return 0;
18030 	case BPF_ST:
18031 		if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) {
18032 			verbose(env, "BPF_ST uses reserved fields\n");
18033 			return -EINVAL;
18034 		}
18035 		return 0;
18036 	case BPF_JMP:
18037 	case BPF_JMP32:
18038 		return check_jmp_fields(env, insn);
18039 	case BPF_LD: {
18040 		u8 mode = BPF_MODE(insn->code);
18041 
18042 		if (mode == BPF_ABS || mode == BPF_IND) {
18043 			if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
18044 			    BPF_SIZE(insn->code) == BPF_DW ||
18045 			    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
18046 				verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
18047 				return -EINVAL;
18048 			}
18049 		} else if (mode != BPF_IMM) {
18050 			verbose(env, "invalid BPF_LD mode\n");
18051 			return -EINVAL;
18052 		}
18053 		return 0;
18054 	}
18055 	default:
18056 		verbose(env, "unknown insn class %d\n", BPF_CLASS(insn->code));
18057 		return -EINVAL;
18058 	}
18059 }
18060 
18061 /*
18062  * Check that insns are sane and rewrite pseudo imm in ld_imm64 instructions:
18063  *
18064  * 1. if it accesses map FD, replace it with actual map pointer.
18065  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
18066  *
18067  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
18068  */
18069 static int check_and_resolve_insns(struct bpf_verifier_env *env)
18070 {
18071 	struct bpf_insn *insn = env->prog->insnsi;
18072 	int insn_cnt = env->prog->len;
18073 	int i, err;
18074 
18075 	err = bpf_prog_calc_tag(env->prog);
18076 	if (err)
18077 		return err;
18078 
18079 	for (i = 0; i < insn_cnt; i++, insn++) {
18080 		if (insn->dst_reg >= MAX_BPF_REG &&
18081 		    !is_stack_arg_st(insn) && !is_stack_arg_stx(insn)) {
18082 			verbose(env, "R%d is invalid\n", insn->dst_reg);
18083 			return -EINVAL;
18084 		}
18085 		if (insn->src_reg >= MAX_BPF_REG && !is_stack_arg_ldx(insn)) {
18086 			verbose(env, "R%d is invalid\n", insn->src_reg);
18087 			return -EINVAL;
18088 		}
18089 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
18090 			struct bpf_insn_aux_data *aux;
18091 			struct bpf_map *map;
18092 			int map_idx;
18093 			u64 addr;
18094 			u32 fd;
18095 
18096 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
18097 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
18098 			    insn[1].off != 0) {
18099 				verbose(env, "invalid bpf_ld_imm64 insn\n");
18100 				return -EINVAL;
18101 			}
18102 
18103 			if (insn[0].off != 0) {
18104 				verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
18105 				return -EINVAL;
18106 			}
18107 
18108 			if (insn[0].src_reg == 0)
18109 				/* valid generic load 64-bit imm */
18110 				goto next_insn;
18111 
18112 			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
18113 				aux = &env->insn_aux_data[i];
18114 				err = check_pseudo_btf_id(env, insn, aux);
18115 				if (err)
18116 					return err;
18117 				goto next_insn;
18118 			}
18119 
18120 			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
18121 				aux = &env->insn_aux_data[i];
18122 				aux->ptr_type = PTR_TO_FUNC;
18123 				goto next_insn;
18124 			}
18125 
18126 			/* In final convert_pseudo_ld_imm64() step, this is
18127 			 * converted into regular 64-bit imm load insn.
18128 			 */
18129 			switch (insn[0].src_reg) {
18130 			case BPF_PSEUDO_MAP_VALUE:
18131 			case BPF_PSEUDO_MAP_IDX_VALUE:
18132 				break;
18133 			case BPF_PSEUDO_MAP_FD:
18134 			case BPF_PSEUDO_MAP_IDX:
18135 				if (insn[1].imm == 0)
18136 					break;
18137 				fallthrough;
18138 			default:
18139 				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
18140 				return -EINVAL;
18141 			}
18142 
18143 			switch (insn[0].src_reg) {
18144 			case BPF_PSEUDO_MAP_IDX_VALUE:
18145 			case BPF_PSEUDO_MAP_IDX:
18146 				if (bpfptr_is_null(env->fd_array)) {
18147 					verbose(env, "fd_idx without fd_array is invalid\n");
18148 					return -EPROTO;
18149 				}
18150 				if (copy_from_bpfptr_offset(&fd, env->fd_array,
18151 							    insn[0].imm * sizeof(fd),
18152 							    sizeof(fd)))
18153 					return -EFAULT;
18154 				break;
18155 			default:
18156 				fd = insn[0].imm;
18157 				break;
18158 			}
18159 
18160 			map_idx = add_used_map(env, fd);
18161 			if (map_idx < 0)
18162 				return map_idx;
18163 			map = env->used_maps[map_idx];
18164 
18165 			aux = &env->insn_aux_data[i];
18166 			aux->map_index = map_idx;
18167 
18168 			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
18169 			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
18170 				addr = (unsigned long)map;
18171 			} else {
18172 				u32 off = insn[1].imm;
18173 
18174 				if (!map->ops->map_direct_value_addr) {
18175 					verbose(env, "no direct value access support for this map type\n");
18176 					return -EINVAL;
18177 				}
18178 
18179 				err = map->ops->map_direct_value_addr(map, &addr, off);
18180 				if (err) {
18181 					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
18182 						map->value_size, off);
18183 					return err;
18184 				}
18185 
18186 				aux->map_off = off;
18187 				addr += off;
18188 			}
18189 
18190 			insn[0].imm = (u32)addr;
18191 			insn[1].imm = addr >> 32;
18192 
18193 next_insn:
18194 			insn++;
18195 			i++;
18196 			continue;
18197 		}
18198 
18199 		/* Basic sanity check before we invest more work here. */
18200 		if (!bpf_opcode_in_insntable(insn->code)) {
18201 			verbose(env, "unknown opcode %02x\n", insn->code);
18202 			return -EINVAL;
18203 		}
18204 
18205 		err = check_insn_fields(env, insn);
18206 		if (err)
18207 			return err;
18208 	}
18209 
18210 	/* now all pseudo BPF_LD_IMM64 instructions load valid
18211 	 * 'struct bpf_map *' into a register instead of user map_fd.
18212 	 * These pointers will be used later by verifier to validate map access.
18213 	 */
18214 	return 0;
18215 }
18216 
18217 /* drop refcnt of maps used by the rejected program */
18218 static void release_maps(struct bpf_verifier_env *env)
18219 {
18220 	__bpf_free_used_maps(env->prog->aux, env->used_maps,
18221 			     env->used_map_cnt);
18222 }
18223 
18224 /* drop refcnt of maps used by the rejected program */
18225 static void release_btfs(struct bpf_verifier_env *env)
18226 {
18227 	__bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt);
18228 }
18229 
18230 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
18231 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
18232 {
18233 	struct bpf_insn *insn = env->prog->insnsi;
18234 	int insn_cnt = env->prog->len;
18235 	int i;
18236 
18237 	for (i = 0; i < insn_cnt; i++, insn++) {
18238 		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
18239 			continue;
18240 		if (insn->src_reg == BPF_PSEUDO_FUNC)
18241 			continue;
18242 		insn->src_reg = 0;
18243 	}
18244 }
18245 
18246 static void release_insn_arrays(struct bpf_verifier_env *env)
18247 {
18248 	int i;
18249 
18250 	for (i = 0; i < env->insn_array_map_cnt; i++)
18251 		bpf_insn_array_release(env->insn_array_maps[i]);
18252 }
18253 
18254 
18255 
18256 /* The verifier does more data flow analysis than llvm and will not
18257  * explore branches that are dead at run time. Malicious programs can
18258  * have dead code too. Therefore replace all dead at-run-time code
18259  * with 'ja -1'.
18260  *
18261  * Just nops are not optimal, e.g. if they would sit at the end of the
18262  * program and through another bug we would manage to jump there, then
18263  * we'd execute beyond program memory otherwise. Returning exception
18264  * code also wouldn't work since we can have subprogs where the dead
18265  * code could be located.
18266  */
18267 static void sanitize_dead_code(struct bpf_verifier_env *env)
18268 {
18269 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18270 	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
18271 	struct bpf_insn *insn = env->prog->insnsi;
18272 	const int insn_cnt = env->prog->len;
18273 	int i;
18274 
18275 	for (i = 0; i < insn_cnt; i++) {
18276 		if (aux_data[i].seen)
18277 			continue;
18278 		memcpy(insn + i, &trap, sizeof(trap));
18279 		aux_data[i].zext_dst = false;
18280 	}
18281 }
18282 
18283 
18284 
18285 static void free_states(struct bpf_verifier_env *env)
18286 {
18287 	struct bpf_verifier_state_list *sl;
18288 	struct list_head *head, *pos, *tmp;
18289 	struct bpf_scc_info *info;
18290 	int i, j;
18291 
18292 	bpf_free_verifier_state(env->cur_state, true);
18293 	env->cur_state = NULL;
18294 	while (!pop_stack(env, NULL, NULL, false));
18295 
18296 	list_for_each_safe(pos, tmp, &env->free_list) {
18297 		sl = container_of(pos, struct bpf_verifier_state_list, node);
18298 		bpf_free_verifier_state(&sl->state, false);
18299 		kfree(sl);
18300 	}
18301 	INIT_LIST_HEAD(&env->free_list);
18302 
18303 	for (i = 0; i < env->scc_cnt; ++i) {
18304 		info = env->scc_info[i];
18305 		if (!info)
18306 			continue;
18307 		for (j = 0; j < info->num_visits; j++)
18308 			bpf_free_backedges(&info->visits[j]);
18309 		kvfree(info);
18310 		env->scc_info[i] = NULL;
18311 	}
18312 
18313 	if (!env->explored_states)
18314 		return;
18315 
18316 	for (i = 0; i < state_htab_size(env); i++) {
18317 		head = &env->explored_states[i];
18318 
18319 		list_for_each_safe(pos, tmp, head) {
18320 			sl = container_of(pos, struct bpf_verifier_state_list, node);
18321 			bpf_free_verifier_state(&sl->state, false);
18322 			kfree(sl);
18323 		}
18324 		INIT_LIST_HEAD(&env->explored_states[i]);
18325 	}
18326 }
18327 
18328 static int do_check_common(struct bpf_verifier_env *env, int subprog)
18329 {
18330 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
18331 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
18332 	struct bpf_prog_aux *aux = env->prog->aux;
18333 	struct bpf_verifier_state *state;
18334 	struct bpf_reg_state *regs;
18335 	int ret, i;
18336 
18337 	env->prev_linfo = NULL;
18338 	env->pass_cnt++;
18339 
18340 	state = kzalloc_obj(struct bpf_verifier_state, GFP_KERNEL_ACCOUNT);
18341 	if (!state)
18342 		return -ENOMEM;
18343 	state->curframe = 0;
18344 	state->speculative = false;
18345 	state->branches = 1;
18346 	state->in_sleepable = env->prog->sleepable;
18347 	state->frame[0] = kzalloc_obj(struct bpf_func_state, GFP_KERNEL_ACCOUNT);
18348 	if (!state->frame[0]) {
18349 		kfree(state);
18350 		return -ENOMEM;
18351 	}
18352 	env->cur_state = state;
18353 	init_func_state(env, state->frame[0],
18354 			BPF_MAIN_FUNC /* callsite */,
18355 			0 /* frameno */,
18356 			subprog);
18357 	state->first_insn_idx = env->subprog_info[subprog].start;
18358 	state->last_insn_idx = -1;
18359 
18360 	regs = state->frame[state->curframe]->regs;
18361 	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
18362 		const char *sub_name = subprog_name(env, subprog);
18363 		struct bpf_subprog_arg_info *arg;
18364 		struct bpf_reg_state *reg;
18365 
18366 		if (env->log.level & BPF_LOG_LEVEL)
18367 			verbose(env, "Validating %s() func#%d...\n", sub_name, subprog);
18368 		ret = btf_prepare_func_args(env, subprog);
18369 		if (ret)
18370 			goto out;
18371 
18372 		if (subprog_is_exc_cb(env, subprog)) {
18373 			state->frame[0]->in_exception_callback_fn = true;
18374 
18375 			/*
18376 			 * Global functions are scalar or void, make sure
18377 			 * we return a scalar.
18378 			 */
18379 			if (subprog_returns_void(env, subprog)) {
18380 				verbose(env, "exception cb cannot return void\n");
18381 				ret = -EINVAL;
18382 				goto out;
18383 			}
18384 
18385 			/* Also ensure the callback only has a single scalar argument. */
18386 			if (sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_ANYTHING) {
18387 				verbose(env, "exception cb only supports single integer argument\n");
18388 				ret = -EINVAL;
18389 				goto out;
18390 			}
18391 		}
18392 		for (i = BPF_REG_1; i <= min_t(u32, sub->arg_cnt, MAX_BPF_FUNC_REG_ARGS); i++) {
18393 			arg = &sub->args[i - BPF_REG_1];
18394 			reg = &regs[i];
18395 
18396 			if (arg->arg_type == ARG_PTR_TO_CTX) {
18397 				reg->type = PTR_TO_CTX;
18398 				mark_reg_known_zero(env, regs, i);
18399 			} else if (arg->arg_type == ARG_ANYTHING) {
18400 				reg->type = SCALAR_VALUE;
18401 				mark_reg_unknown(env, regs, i);
18402 			} else if (arg->arg_type == ARG_PTR_TO_DYNPTR) {
18403 				/* assume unspecial LOCAL dynptr type */
18404 				__mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen, 0);
18405 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
18406 				reg->type = PTR_TO_MEM;
18407 				reg->type |= arg->arg_type &
18408 					     (PTR_MAYBE_NULL | PTR_UNTRUSTED | MEM_RDONLY);
18409 				mark_reg_known_zero(env, regs, i);
18410 				reg->mem_size = arg->mem_size;
18411 				if (arg->arg_type & PTR_MAYBE_NULL)
18412 					reg->id = ++env->id_gen;
18413 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
18414 				reg->type = PTR_TO_BTF_ID;
18415 				if (arg->arg_type & PTR_MAYBE_NULL)
18416 					reg->type |= PTR_MAYBE_NULL;
18417 				if (arg->arg_type & PTR_UNTRUSTED)
18418 					reg->type |= PTR_UNTRUSTED;
18419 				if (arg->arg_type & PTR_TRUSTED)
18420 					reg->type |= PTR_TRUSTED;
18421 				mark_reg_known_zero(env, regs, i);
18422 				reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */
18423 				reg->btf_id = arg->btf_id;
18424 				reg->id = ++env->id_gen;
18425 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
18426 				/* caller can pass either PTR_TO_ARENA or SCALAR */
18427 				mark_reg_unknown(env, regs, i);
18428 			} else {
18429 				verifier_bug(env, "unhandled arg#%d type %d",
18430 					     i - BPF_REG_1 + 1, arg->arg_type);
18431 				ret = -EFAULT;
18432 				goto out;
18433 			}
18434 		}
18435 		if (env->prog->type == BPF_PROG_TYPE_EXT && sub->arg_cnt > MAX_BPF_FUNC_REG_ARGS) {
18436 			verbose(env, "freplace programs with >%d args not supported yet\n",
18437 				MAX_BPF_FUNC_REG_ARGS);
18438 			ret = -EINVAL;
18439 			goto out;
18440 		}
18441 	} else {
18442 		/* if main BPF program has associated BTF info, validate that
18443 		 * it's matching expected signature, and otherwise mark BTF
18444 		 * info for main program as unreliable
18445 		 */
18446 		if (env->prog->aux->func_info_aux) {
18447 			ret = btf_prepare_func_args(env, 0);
18448 			if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX) {
18449 				env->prog->aux->func_info_aux[0].unreliable = true;
18450 				sub->arg_cnt = 1;
18451 				sub->stack_arg_cnt = 0;
18452 			}
18453 		}
18454 
18455 		/* 1st arg to a function */
18456 		regs[BPF_REG_1].type = PTR_TO_CTX;
18457 		mark_reg_known_zero(env, regs, BPF_REG_1);
18458 	}
18459 
18460 	/* Acquire references for struct_ops program arguments tagged with "__ref" */
18461 	if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
18462 		for (i = 0; i < aux->ctx_arg_info_size; i++) {
18463 			ret = aux->ctx_arg_info[i].refcounted ? acquire_reference(env, 0, 0) : 0;
18464 			if (ret < 0)
18465 				goto out;
18466 
18467 			aux->ctx_arg_info[i].ref_id = ret;
18468 		}
18469 	}
18470 
18471 	ret = do_check(env);
18472 out:
18473 	if (!ret && pop_log)
18474 		bpf_vlog_reset(&env->log, 0);
18475 	free_states(env);
18476 	return ret;
18477 }
18478 
18479 /* Lazily verify all global functions based on their BTF, if they are called
18480  * from main BPF program or any of subprograms transitively.
18481  * BPF global subprogs called from dead code are not validated.
18482  * All callable global functions must pass verification.
18483  * Otherwise the whole program is rejected.
18484  * Consider:
18485  * int bar(int);
18486  * int foo(int f)
18487  * {
18488  *    return bar(f);
18489  * }
18490  * int bar(int b)
18491  * {
18492  *    ...
18493  * }
18494  * foo() will be verified first for R1=any_scalar_value. During verification it
18495  * will be assumed that bar() already verified successfully and call to bar()
18496  * from foo() will be checked for type match only. Later bar() will be verified
18497  * independently to check that it's safe for R1=any_scalar_value.
18498  */
18499 static int do_check_subprogs(struct bpf_verifier_env *env)
18500 {
18501 	struct bpf_prog_aux *aux = env->prog->aux;
18502 	struct bpf_func_info_aux *sub_aux;
18503 	int i, ret, new_cnt;
18504 	u32 insn_processed;
18505 
18506 	if (!aux->func_info)
18507 		return 0;
18508 
18509 	/* exception callback is presumed to be always called */
18510 	if (env->exception_callback_subprog)
18511 		subprog_aux(env, env->exception_callback_subprog)->called = true;
18512 
18513 again:
18514 	new_cnt = 0;
18515 	for (i = 1; i < env->subprog_cnt; i++) {
18516 		if (!bpf_subprog_is_global(env, i))
18517 			continue;
18518 
18519 		insn_processed = env->insn_processed;
18520 
18521 		sub_aux = subprog_aux(env, i);
18522 		if (!sub_aux->called || sub_aux->verified)
18523 			continue;
18524 
18525 		env->insn_idx = env->subprog_info[i].start;
18526 		WARN_ON_ONCE(env->insn_idx == 0);
18527 		ret = do_check_common(env, i);
18528 		env->subprog_info[i].insn_processed = env->insn_processed - insn_processed;
18529 		if (ret) {
18530 			return ret;
18531 		} else if (env->log.level & BPF_LOG_LEVEL) {
18532 			verbose(env, "Func#%d ('%s') is safe for any args that match its prototype\n",
18533 				i, subprog_name(env, i));
18534 		}
18535 
18536 		/* We verified new global subprog, it might have called some
18537 		 * more global subprogs that we haven't verified yet, so we
18538 		 * need to do another pass over subprogs to verify those.
18539 		 */
18540 		sub_aux->verified = true;
18541 		new_cnt++;
18542 	}
18543 
18544 	/* We can't loop forever as we verify at least one global subprog on
18545 	 * each pass.
18546 	 */
18547 	if (new_cnt)
18548 		goto again;
18549 
18550 	return 0;
18551 }
18552 
18553 static int do_check_main(struct bpf_verifier_env *env)
18554 {
18555 	u32 insn_processed = env->insn_processed;
18556 	int ret;
18557 
18558 	env->insn_idx = 0;
18559 	ret = do_check_common(env, 0);
18560 	env->subprog_info[0].insn_processed = env->insn_processed - insn_processed;
18561 	if (!ret)
18562 		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
18563 	return ret;
18564 }
18565 
18566 
18567 static void print_verification_stats(struct bpf_verifier_env *env)
18568 {
18569 	/* Skip over hidden subprogs which are not verified. */
18570 	int i, subprog_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
18571 
18572 	if (env->log.level & BPF_LOG_STATS) {
18573 		verbose(env, "verification time %lld usec\n",
18574 			div_u64(env->verification_time, 1000));
18575 		verbose(env, "stack depth %d", env->subprog_info[0].stack_depth);
18576 		for (i = 1; i < subprog_cnt; i++)
18577 			verbose(env, "+%d", env->subprog_info[i].stack_depth);
18578 		verbose(env, " max %d\n", env->max_stack_depth);
18579 		verbose(env, "insns processed %d", env->subprog_info[0].insn_processed);
18580 		for (i = 1; i < subprog_cnt; i++)
18581 			if (bpf_subprog_is_global(env, i))
18582 				verbose(env, "+%d", env->subprog_info[i].insn_processed);
18583 		verbose(env, "\n");
18584 	}
18585 	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
18586 		"total_states %d peak_states %d mark_read %d\n",
18587 		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
18588 		env->max_states_per_insn, env->total_states,
18589 		env->peak_states, env->longest_mark_read_walk);
18590 }
18591 
18592 int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
18593 			       const struct bpf_ctx_arg_aux *info, u32 cnt)
18594 {
18595 	prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL_ACCOUNT);
18596 	prog->aux->ctx_arg_info_size = cnt;
18597 
18598 	return prog->aux->ctx_arg_info ? 0 : -ENOMEM;
18599 }
18600 
18601 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
18602 {
18603 	const struct btf_type *t, *func_proto;
18604 	const struct bpf_struct_ops_desc *st_ops_desc;
18605 	const struct bpf_struct_ops *st_ops;
18606 	const struct btf_member *member;
18607 	struct bpf_prog *prog = env->prog;
18608 	bool has_refcounted_arg = false;
18609 	u32 btf_id, member_idx, member_off;
18610 	struct btf *btf;
18611 	const char *mname;
18612 	int i, err;
18613 
18614 	if (!prog->gpl_compatible) {
18615 		verbose(env, "struct ops programs must have a GPL compatible license\n");
18616 		return -EINVAL;
18617 	}
18618 
18619 	if (!prog->aux->attach_btf_id)
18620 		return -ENOTSUPP;
18621 
18622 	btf = prog->aux->attach_btf;
18623 	if (btf_is_module(btf)) {
18624 		/* Make sure st_ops is valid through the lifetime of env */
18625 		env->attach_btf_mod = btf_try_get_module(btf);
18626 		if (!env->attach_btf_mod) {
18627 			verbose(env, "struct_ops module %s is not found\n",
18628 				btf_get_name(btf));
18629 			return -ENOTSUPP;
18630 		}
18631 	}
18632 
18633 	btf_id = prog->aux->attach_btf_id;
18634 	st_ops_desc = bpf_struct_ops_find(btf, btf_id);
18635 	if (!st_ops_desc) {
18636 		verbose(env, "attach_btf_id %u is not a supported struct\n",
18637 			btf_id);
18638 		return -ENOTSUPP;
18639 	}
18640 	st_ops = st_ops_desc->st_ops;
18641 
18642 	t = st_ops_desc->type;
18643 	member_idx = prog->expected_attach_type;
18644 	if (member_idx >= btf_type_vlen(t)) {
18645 		verbose(env, "attach to invalid member idx %u of struct %s\n",
18646 			member_idx, st_ops->name);
18647 		return -EINVAL;
18648 	}
18649 
18650 	member = &btf_type_member(t)[member_idx];
18651 	mname = btf_name_by_offset(btf, member->name_off);
18652 	func_proto = btf_type_resolve_func_ptr(btf, member->type,
18653 					       NULL);
18654 	if (!func_proto) {
18655 		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
18656 			mname, member_idx, st_ops->name);
18657 		return -EINVAL;
18658 	}
18659 
18660 	member_off = __btf_member_bit_offset(t, member) / 8;
18661 	err = bpf_struct_ops_supported(st_ops, member_off);
18662 	if (err) {
18663 		verbose(env, "attach to unsupported member %s of struct %s\n",
18664 			mname, st_ops->name);
18665 		return err;
18666 	}
18667 
18668 	if (st_ops->check_member) {
18669 		err = st_ops->check_member(t, member, prog);
18670 
18671 		if (err) {
18672 			verbose(env, "attach to unsupported member %s of struct %s\n",
18673 				mname, st_ops->name);
18674 			return err;
18675 		}
18676 	}
18677 
18678 	if (prog->aux->priv_stack_requested && !bpf_jit_supports_private_stack()) {
18679 		verbose(env, "Private stack not supported by jit\n");
18680 		return -EACCES;
18681 	}
18682 
18683 	for (i = 0; i < st_ops_desc->arg_info[member_idx].cnt; i++) {
18684 		if (st_ops_desc->arg_info[member_idx].info[i].refcounted) {
18685 			has_refcounted_arg = true;
18686 			break;
18687 		}
18688 	}
18689 
18690 	/* Tail call is not allowed for programs with refcounted arguments since we
18691 	 * cannot guarantee that valid refcounted kptrs will be passed to the callee.
18692 	 */
18693 	for (i = 0; i < env->subprog_cnt; i++) {
18694 		if (has_refcounted_arg && env->subprog_info[i].has_tail_call) {
18695 			verbose(env, "program with __ref argument cannot tail call\n");
18696 			return -EINVAL;
18697 		}
18698 	}
18699 
18700 	prog->aux->st_ops = st_ops;
18701 	prog->aux->attach_st_ops_member_off = member_off;
18702 
18703 	prog->aux->attach_func_proto = func_proto;
18704 	prog->aux->attach_func_name = mname;
18705 	env->ops = st_ops->verifier_ops;
18706 
18707 	return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info,
18708 					  st_ops_desc->arg_info[member_idx].cnt);
18709 }
18710 #define SECURITY_PREFIX "security_"
18711 
18712 #ifdef CONFIG_FUNCTION_ERROR_INJECTION
18713 
18714 /* list of non-sleepable functions that are otherwise on
18715  * ALLOW_ERROR_INJECTION list
18716  */
18717 BTF_SET_START(btf_non_sleepable_error_inject)
18718 /* Three functions below can be called from sleepable and non-sleepable context.
18719  * Assume non-sleepable from bpf safety point of view.
18720  */
18721 BTF_ID(func, __filemap_add_folio)
18722 #ifdef CONFIG_FAIL_PAGE_ALLOC
18723 BTF_ID(func, should_fail_alloc_page)
18724 #endif
18725 #ifdef CONFIG_FAILSLAB
18726 BTF_ID(func, should_failslab)
18727 #endif
18728 BTF_SET_END(btf_non_sleepable_error_inject)
18729 
18730 static int check_non_sleepable_error_inject(u32 btf_id)
18731 {
18732 	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
18733 }
18734 
18735 static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
18736 {
18737 	/* fentry/fexit/fmod_ret progs can be sleepable if they are
18738 	 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
18739 	 */
18740 	if (!check_non_sleepable_error_inject(btf_id) &&
18741 	    within_error_injection_list(addr))
18742 		return 0;
18743 
18744 	return -EINVAL;
18745 }
18746 
18747 static int check_attach_modify_return(unsigned long addr, const char *func_name)
18748 {
18749 	if (within_error_injection_list(addr) ||
18750 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
18751 		return 0;
18752 
18753 	return -EINVAL;
18754 }
18755 
18756 #else
18757 
18758 /* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code
18759  * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name()
18760  * but that just compares two concrete function names.
18761  */
18762 static bool has_arch_syscall_prefix(const char *func_name)
18763 {
18764 #if defined(__x86_64__)
18765 	return !strncmp(func_name, "__x64_", 6);
18766 #elif defined(__i386__)
18767 	return !strncmp(func_name, "__ia32_", 7);
18768 #elif defined(__s390x__)
18769 	return !strncmp(func_name, "__s390x_", 8);
18770 #elif defined(__aarch64__)
18771 	return !strncmp(func_name, "__arm64_", 8);
18772 #elif defined(__riscv)
18773 	return !strncmp(func_name, "__riscv_", 8);
18774 #elif defined(__powerpc__) || defined(__powerpc64__)
18775 	return !strncmp(func_name, "sys_", 4);
18776 #elif defined(__loongarch__)
18777 	return !strncmp(func_name, "sys_", 4);
18778 #else
18779 	return false;
18780 #endif
18781 }
18782 
18783 /* Without error injection, allow sleepable and fmod_ret progs on syscalls. */
18784 
18785 static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
18786 {
18787 	if (has_arch_syscall_prefix(func_name))
18788 		return 0;
18789 
18790 	return -EINVAL;
18791 }
18792 
18793 static int check_attach_modify_return(unsigned long addr, const char *func_name)
18794 {
18795 	if (has_arch_syscall_prefix(func_name) ||
18796 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
18797 		return 0;
18798 
18799 	return -EINVAL;
18800 }
18801 
18802 #endif /* CONFIG_FUNCTION_ERROR_INJECTION */
18803 
18804 static bool is_tracing_multi_id(const struct bpf_prog *prog, u32 btf_id)
18805 {
18806 	return is_tracing_multi(prog->expected_attach_type) && bpf_multi_func_btf_id[0] == btf_id;
18807 }
18808 
18809 static int btf_id_allow_sleepable(u32 btf_id, unsigned long addr, const struct bpf_prog *prog,
18810 				  const struct btf *btf)
18811 {
18812 	const struct btf_type *t;
18813 	const char *tname;
18814 
18815 	switch (prog->type) {
18816 	case BPF_PROG_TYPE_TRACING:
18817 		t = btf_type_by_id(btf, btf_id);
18818 		if (!t)
18819 			return -EINVAL;
18820 		tname = btf_name_by_offset(btf, t->name_off);
18821 		if (!tname)
18822 			return -EINVAL;
18823 
18824 		/*
18825 		 * *.multi sleepable programs will pass initial sleepable check,
18826 		 * the actual attached btf ids are checked later during the link
18827 		 * attachment.
18828 		 */
18829 		if (is_tracing_multi_id(prog, btf_id))
18830 			return 0;
18831 		if (!check_attach_sleepable(btf_id, addr, tname))
18832 			return 0;
18833 		/*
18834 		 * fentry/fexit/fmod_ret progs can also be sleepable if they are
18835 		 * in the fmodret id set with the KF_SLEEPABLE flag.
18836 		 */
18837 		else {
18838 			u32 *flags = btf_kfunc_is_modify_return(btf, btf_id, prog);
18839 
18840 			if (flags && (*flags & KF_SLEEPABLE))
18841 				return 0;
18842 		}
18843 		break;
18844 	case BPF_PROG_TYPE_LSM:
18845 		/*
18846 		 * LSM progs check that they are attached to bpf_lsm_*() funcs.
18847 		 * Only some of them are sleepable.
18848 		 */
18849 		if (bpf_lsm_is_sleepable_hook(btf_id))
18850 			return 0;
18851 		break;
18852 	default:
18853 		break;
18854 	}
18855 	return -EINVAL;
18856 }
18857 
18858 int bpf_check_attach_target(struct bpf_verifier_log *log,
18859 			    const struct bpf_prog *prog,
18860 			    const struct bpf_prog *tgt_prog,
18861 			    u32 btf_id,
18862 			    struct bpf_attach_target_info *tgt_info)
18863 {
18864 	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
18865 	bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
18866 	char trace_symbol[KSYM_SYMBOL_LEN];
18867 	const char prefix[] = "btf_trace_";
18868 	struct bpf_raw_event_map *btp;
18869 	int ret = 0, subprog = -1, i;
18870 	const struct btf_type *t;
18871 	bool conservative = true;
18872 	const char *tname, *fname;
18873 	struct btf *btf;
18874 	long addr = 0;
18875 	struct module *mod = NULL;
18876 
18877 	if (!btf_id) {
18878 		bpf_log(log, "Tracing programs must provide btf_id\n");
18879 		return -EINVAL;
18880 	}
18881 	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
18882 	if (!btf) {
18883 		bpf_log(log,
18884 			"Tracing program can only be attached to another program annotated with BTF\n");
18885 		return -EINVAL;
18886 	}
18887 	t = btf_type_by_id(btf, btf_id);
18888 	if (!t) {
18889 		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
18890 		return -EINVAL;
18891 	}
18892 	tname = btf_name_by_offset(btf, t->name_off);
18893 	if (!tname) {
18894 		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
18895 		return -EINVAL;
18896 	}
18897 	if (tgt_prog) {
18898 		struct bpf_prog_aux *aux = tgt_prog->aux;
18899 		bool tgt_changes_pkt_data;
18900 		bool tgt_might_sleep;
18901 
18902 		if (bpf_prog_is_dev_bound(prog->aux) &&
18903 		    !bpf_prog_dev_bound_match(prog, tgt_prog)) {
18904 			bpf_log(log, "Target program bound device mismatch");
18905 			return -EINVAL;
18906 		}
18907 
18908 		for (i = 0; i < aux->func_info_cnt; i++)
18909 			if (aux->func_info[i].type_id == btf_id) {
18910 				subprog = i;
18911 				break;
18912 			}
18913 		if (subprog == -1) {
18914 			bpf_log(log, "Subprog %s doesn't exist\n", tname);
18915 			return -EINVAL;
18916 		}
18917 		if (aux->func && aux->func[subprog]->aux->exception_cb) {
18918 			bpf_log(log,
18919 				"%s programs cannot attach to exception callback\n",
18920 				prog_extension ? "Extension" : "Tracing");
18921 			return -EINVAL;
18922 		}
18923 		conservative = aux->func_info_aux[subprog].unreliable;
18924 		if (prog_extension) {
18925 			if (conservative) {
18926 				bpf_log(log,
18927 					"Cannot replace static functions\n");
18928 				return -EINVAL;
18929 			}
18930 			if (!prog->jit_requested) {
18931 				bpf_log(log,
18932 					"Extension programs should be JITed\n");
18933 				return -EINVAL;
18934 			}
18935 			tgt_changes_pkt_data = aux->func
18936 					       ? aux->func[subprog]->aux->changes_pkt_data
18937 					       : aux->changes_pkt_data;
18938 			if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) {
18939 				bpf_log(log,
18940 					"Extension program changes packet data, while original does not\n");
18941 				return -EINVAL;
18942 			}
18943 
18944 			tgt_might_sleep = aux->func
18945 					  ? aux->func[subprog]->aux->might_sleep
18946 					  : aux->might_sleep;
18947 			if (prog->aux->might_sleep && !tgt_might_sleep) {
18948 				bpf_log(log,
18949 					"Extension program may sleep, while original does not\n");
18950 				return -EINVAL;
18951 			}
18952 		}
18953 		if (!tgt_prog->jited) {
18954 			bpf_log(log, "Can attach to only JITed progs\n");
18955 			return -EINVAL;
18956 		}
18957 		if (prog_tracing) {
18958 			if (aux->attach_tracing_prog) {
18959 				/*
18960 				 * Target program is an fentry/fexit which is already attached
18961 				 * to another tracing program. More levels of nesting
18962 				 * attachment are not allowed.
18963 				 */
18964 				bpf_log(log, "Cannot nest tracing program attach more than once\n");
18965 				return -EINVAL;
18966 			}
18967 		} else if (tgt_prog->type == prog->type) {
18968 			/*
18969 			 * To avoid potential call chain cycles, prevent attaching of a
18970 			 * program extension to another extension. It's ok to attach
18971 			 * fentry/fexit to extension program.
18972 			 */
18973 			bpf_log(log, "Cannot recursively attach\n");
18974 			return -EINVAL;
18975 		}
18976 		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
18977 		    prog_extension &&
18978 		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
18979 		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT ||
18980 		     tgt_prog->expected_attach_type == BPF_TRACE_FENTRY_MULTI ||
18981 		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT_MULTI ||
18982 		     tgt_prog->expected_attach_type == BPF_TRACE_FSESSION ||
18983 		     tgt_prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) {
18984 			/* Program extensions can extend all program types
18985 			 * except fentry/fexit. The reason is the following.
18986 			 * The fentry/fexit programs are used for performance
18987 			 * analysis, stats and can be attached to any program
18988 			 * type. When extension program is replacing XDP function
18989 			 * it is necessary to allow performance analysis of all
18990 			 * functions. Both original XDP program and its program
18991 			 * extension. Hence attaching fentry/fexit to
18992 			 * BPF_PROG_TYPE_EXT is allowed. If extending of
18993 			 * fentry/fexit was allowed it would be possible to create
18994 			 * long call chain fentry->extension->fentry->extension
18995 			 * beyond reasonable stack size. Hence extending fentry
18996 			 * is not allowed.
18997 			 */
18998 			bpf_log(log, "Cannot extend fentry/fexit/fsession\n");
18999 			return -EINVAL;
19000 		}
19001 	} else {
19002 		if (prog_extension) {
19003 			bpf_log(log, "Cannot replace kernel functions\n");
19004 			return -EINVAL;
19005 		}
19006 	}
19007 
19008 	switch (prog->expected_attach_type) {
19009 	case BPF_TRACE_RAW_TP:
19010 		if (tgt_prog) {
19011 			bpf_log(log,
19012 				"Only FENTRY/FEXIT/FSESSION progs are attachable to another BPF prog\n");
19013 			return -EINVAL;
19014 		}
19015 		if (!btf_type_is_typedef(t)) {
19016 			bpf_log(log, "attach_btf_id %u is not a typedef\n",
19017 				btf_id);
19018 			return -EINVAL;
19019 		}
19020 		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
19021 			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
19022 				btf_id, tname);
19023 			return -EINVAL;
19024 		}
19025 		tname += sizeof(prefix) - 1;
19026 
19027 		/* The func_proto of "btf_trace_##tname" is generated from typedef without argument
19028 		 * names. Thus using bpf_raw_event_map to get argument names.
19029 		 */
19030 		btp = bpf_get_raw_tracepoint(tname);
19031 		if (!btp)
19032 			return -EINVAL;
19033 		if (prog->sleepable && !tracepoint_is_faultable(btp->tp)) {
19034 			bpf_log(log, "Sleepable program cannot attach to non-faultable tracepoint %s\n",
19035 				tname);
19036 			bpf_put_raw_tracepoint(btp);
19037 			return -EINVAL;
19038 		}
19039 		fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
19040 					trace_symbol);
19041 		bpf_put_raw_tracepoint(btp);
19042 
19043 		if (fname)
19044 			ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
19045 
19046 		if (!fname || ret < 0) {
19047 			bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
19048 				prefix, tname);
19049 			t = btf_type_by_id(btf, t->type);
19050 			if (!btf_type_is_ptr(t))
19051 				/* should never happen in valid vmlinux build */
19052 				return -EINVAL;
19053 		} else {
19054 			t = btf_type_by_id(btf, ret);
19055 			if (!btf_type_is_func(t))
19056 				/* should never happen in valid vmlinux build */
19057 				return -EINVAL;
19058 		}
19059 
19060 		t = btf_type_by_id(btf, t->type);
19061 		if (!btf_type_is_func_proto(t))
19062 			/* should never happen in valid vmlinux build */
19063 			return -EINVAL;
19064 
19065 		break;
19066 	case BPF_TRACE_ITER:
19067 		if (!btf_type_is_func(t)) {
19068 			bpf_log(log, "attach_btf_id %u is not a function\n",
19069 				btf_id);
19070 			return -EINVAL;
19071 		}
19072 		t = btf_type_by_id(btf, t->type);
19073 		if (!btf_type_is_func_proto(t))
19074 			return -EINVAL;
19075 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19076 		if (ret)
19077 			return ret;
19078 		break;
19079 	default:
19080 		if (!prog_extension)
19081 			return -EINVAL;
19082 		fallthrough;
19083 	case BPF_MODIFY_RETURN:
19084 	case BPF_LSM_MAC:
19085 	case BPF_LSM_CGROUP:
19086 	case BPF_TRACE_FENTRY:
19087 	case BPF_TRACE_FEXIT:
19088 	case BPF_TRACE_FSESSION:
19089 	case BPF_TRACE_FSESSION_MULTI:
19090 	case BPF_TRACE_FENTRY_MULTI:
19091 	case BPF_TRACE_FEXIT_MULTI:
19092 		if ((prog->expected_attach_type == BPF_TRACE_FSESSION ||
19093 		    prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) &&
19094 		    !bpf_jit_supports_fsession()) {
19095 			bpf_log(log, "JIT does not support fsession\n");
19096 			return -EOPNOTSUPP;
19097 		}
19098 		if (!btf_type_is_func(t)) {
19099 			bpf_log(log, "attach_btf_id %u is not a function\n",
19100 				btf_id);
19101 			return -EINVAL;
19102 		}
19103 		if (prog_extension &&
19104 		    btf_check_type_match(log, prog, btf, t))
19105 			return -EINVAL;
19106 		t = btf_type_by_id(btf, t->type);
19107 		if (!btf_type_is_func_proto(t))
19108 			return -EINVAL;
19109 
19110 		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
19111 		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
19112 		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
19113 			return -EINVAL;
19114 
19115 		if (tgt_prog && conservative)
19116 			t = NULL;
19117 
19118 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19119 		if (ret < 0)
19120 			return ret;
19121 
19122 		/*
19123 		 * *.multi programs don't need an address during program
19124 		 * verification, we just take the module ref if needed.
19125 		 */
19126 		if (is_tracing_multi_id(prog, btf_id)) {
19127 			if (btf_is_module(btf)) {
19128 				mod = btf_try_get_module(btf);
19129 				if (!mod)
19130 					return -ENOENT;
19131 			}
19132 			addr = 0;
19133 		} else if (tgt_prog) {
19134 			if (subprog == 0)
19135 				addr = (long) tgt_prog->bpf_func;
19136 			else
19137 				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
19138 		} else {
19139 			if (btf_is_module(btf)) {
19140 				mod = btf_try_get_module(btf);
19141 				if (mod)
19142 					addr = find_kallsyms_symbol_value(mod, tname);
19143 				else
19144 					addr = 0;
19145 			} else {
19146 				addr = kallsyms_lookup_name(tname);
19147 			}
19148 			if (!addr) {
19149 				module_put(mod);
19150 				bpf_log(log,
19151 					"The address of function %s cannot be found\n",
19152 					tname);
19153 				return -ENOENT;
19154 			}
19155 		}
19156 
19157 		if (prog->sleepable) {
19158 			ret = btf_id_allow_sleepable(btf_id, addr, prog, btf);
19159 			if (ret) {
19160 				module_put(mod);
19161 				bpf_log(log, "%s is not sleepable\n", tname);
19162 				return ret;
19163 			}
19164 		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
19165 			if (tgt_prog) {
19166 				module_put(mod);
19167 				bpf_log(log, "can't modify return codes of BPF programs\n");
19168 				return -EINVAL;
19169 			}
19170 			ret = -EINVAL;
19171 			if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
19172 			    !check_attach_modify_return(addr, tname))
19173 				ret = 0;
19174 			if (ret) {
19175 				module_put(mod);
19176 				bpf_log(log, "%s() is not modifiable\n", tname);
19177 				return ret;
19178 			}
19179 		}
19180 
19181 		break;
19182 	}
19183 	tgt_info->tgt_addr = addr;
19184 	tgt_info->tgt_name = tname;
19185 	tgt_info->tgt_type = t;
19186 	tgt_info->tgt_mod = mod;
19187 	return 0;
19188 }
19189 
19190 BTF_SET_START(btf_id_deny)
19191 BTF_ID_UNUSED
19192 #ifdef CONFIG_SMP
19193 BTF_ID(func, ___migrate_enable)
19194 BTF_ID(func, migrate_disable)
19195 BTF_ID(func, migrate_enable)
19196 #endif
19197 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
19198 BTF_ID(func, rcu_read_unlock_strict)
19199 #endif
19200 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
19201 BTF_ID(func, preempt_count_add)
19202 BTF_ID(func, preempt_count_sub)
19203 #endif
19204 #ifdef CONFIG_PREEMPT_RCU
19205 BTF_ID(func, __rcu_read_lock)
19206 BTF_ID(func, __rcu_read_unlock)
19207 #endif
19208 BTF_SET_END(btf_id_deny)
19209 
19210 /* fexit and fmod_ret can't be used to attach to __noreturn functions.
19211  * Currently, we must manually list all __noreturn functions here. Once a more
19212  * robust solution is implemented, this workaround can be removed.
19213  */
19214 BTF_SET_START(noreturn_deny)
19215 #ifdef CONFIG_IA32_EMULATION
19216 BTF_ID(func, __ia32_sys_exit)
19217 BTF_ID(func, __ia32_sys_exit_group)
19218 #endif
19219 #ifdef CONFIG_KUNIT
19220 BTF_ID(func, __kunit_abort)
19221 BTF_ID(func, kunit_try_catch_throw)
19222 #endif
19223 #ifdef CONFIG_MODULES
19224 BTF_ID(func, __module_put_and_kthread_exit)
19225 #endif
19226 #ifdef CONFIG_X86_64
19227 BTF_ID(func, __x64_sys_exit)
19228 BTF_ID(func, __x64_sys_exit_group)
19229 #endif
19230 BTF_ID(func, do_exit)
19231 BTF_ID(func, do_group_exit)
19232 BTF_ID(func, kthread_complete_and_exit)
19233 BTF_ID(func, make_task_dead)
19234 BTF_SET_END(noreturn_deny)
19235 
19236 static bool can_be_sleepable(struct bpf_prog *prog)
19237 {
19238 	if (prog->type == BPF_PROG_TYPE_TRACING) {
19239 		switch (prog->expected_attach_type) {
19240 		case BPF_TRACE_FENTRY:
19241 		case BPF_TRACE_FEXIT:
19242 		case BPF_MODIFY_RETURN:
19243 		case BPF_TRACE_ITER:
19244 		case BPF_TRACE_FSESSION:
19245 		case BPF_TRACE_RAW_TP:
19246 		case BPF_TRACE_FENTRY_MULTI:
19247 		case BPF_TRACE_FEXIT_MULTI:
19248 		case BPF_TRACE_FSESSION_MULTI:
19249 			return true;
19250 		default:
19251 			return false;
19252 		}
19253 	}
19254 	if (prog->type == BPF_PROG_TYPE_LSM)
19255 		return prog->expected_attach_type != BPF_LSM_CGROUP;
19256 
19257 	return prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
19258 	       prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
19259 	       prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT ||
19260 	       prog->type == BPF_PROG_TYPE_TRACEPOINT;
19261 }
19262 
19263 static int check_attach_btf_id(struct bpf_verifier_env *env)
19264 {
19265 	struct bpf_prog *prog = env->prog;
19266 	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
19267 	struct bpf_attach_target_info tgt_info = {};
19268 	u32 btf_id = prog->aux->attach_btf_id;
19269 	struct bpf_trampoline *tr;
19270 	int ret;
19271 	u64 key;
19272 
19273 	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
19274 		if (prog->sleepable)
19275 			/* attach_btf_id checked to be zero already */
19276 			return 0;
19277 		verbose(env, "Syscall programs can only be sleepable\n");
19278 		return -EINVAL;
19279 	}
19280 
19281 	if (prog->sleepable && !can_be_sleepable(prog)) {
19282 		verbose(env, "Program of this type cannot be sleepable\n");
19283 		return -EINVAL;
19284 	}
19285 
19286 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
19287 		return check_struct_ops_btf_id(env);
19288 
19289 	if (prog->type != BPF_PROG_TYPE_TRACING &&
19290 	    prog->type != BPF_PROG_TYPE_LSM &&
19291 	    prog->type != BPF_PROG_TYPE_EXT)
19292 		return 0;
19293 
19294 	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
19295 	if (ret)
19296 		return ret;
19297 
19298 	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
19299 		/* to make freplace equivalent to their targets, they need to
19300 		 * inherit env->ops and expected_attach_type for the rest of the
19301 		 * verification
19302 		 */
19303 		env->ops = bpf_verifier_ops[tgt_prog->type];
19304 		prog->expected_attach_type = tgt_prog->expected_attach_type;
19305 	}
19306 
19307 	/* store info about the attachment target that will be used later */
19308 	prog->aux->attach_func_proto = tgt_info.tgt_type;
19309 	prog->aux->attach_func_name = tgt_info.tgt_name;
19310 	prog->aux->mod = tgt_info.tgt_mod;
19311 
19312 	if (tgt_prog) {
19313 		prog->aux->saved_dst_prog_type = tgt_prog->type;
19314 		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
19315 	}
19316 
19317 	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
19318 		prog->aux->attach_btf_trace = true;
19319 		return 0;
19320 	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
19321 		return bpf_iter_prog_supported(prog);
19322 	}
19323 
19324 	if (prog->type == BPF_PROG_TYPE_LSM) {
19325 		ret = bpf_lsm_verify_prog(&env->log, prog);
19326 		if (ret < 0)
19327 			return ret;
19328 	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
19329 		   btf_id_set_contains(&btf_id_deny, btf_id)) {
19330 		verbose(env, "Attaching tracing programs to function '%s' is rejected.\n",
19331 			tgt_info.tgt_name);
19332 		return -EINVAL;
19333 	} else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
19334 		   prog->expected_attach_type == BPF_TRACE_FSESSION ||
19335 		   prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI ||
19336 		   prog->expected_attach_type == BPF_MODIFY_RETURN) &&
19337 		   btf_id_set_contains(&noreturn_deny, btf_id)) {
19338 		verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n",
19339 			tgt_info.tgt_name);
19340 		return -EINVAL;
19341 	}
19342 
19343 	/*
19344 	 * We don't get trampoline for tracing_multi programs at this point,
19345 	 * it's done when tracing_multi link is created.
19346 	 */
19347 	if (prog->type == BPF_PROG_TYPE_TRACING &&
19348 	    is_tracing_multi(prog->expected_attach_type))
19349 		return 0;
19350 
19351 	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
19352 	tr = bpf_trampoline_get(key, &tgt_info);
19353 	if (!tr)
19354 		return -ENOMEM;
19355 
19356 	if (tgt_prog && tgt_prog->aux->tail_call_reachable)
19357 		tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
19358 
19359 	prog->aux->dst_trampoline = tr;
19360 	return 0;
19361 }
19362 
19363 int bpf_check_attach_btf_id_multi(struct btf *btf, struct bpf_prog *prog, u32 btf_id,
19364 				  struct bpf_attach_target_info *tgt_info)
19365 {
19366 	const struct btf_type *t;
19367 	unsigned long addr;
19368 	const char *tname;
19369 	int err;
19370 
19371 	if (!btf_id || !btf)
19372 		return -EINVAL;
19373 
19374 	/* Check noreturn attachment. */
19375 	if ((prog->expected_attach_type == BPF_TRACE_FEXIT_MULTI ||
19376 	     prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) &&
19377 	     btf_id_set_contains(&noreturn_deny, btf_id))
19378 		return -EINVAL;
19379 	/* Check denied attachment. */
19380 	if (btf_id_set_contains(&btf_id_deny, btf_id))
19381 		return -EINVAL;
19382 
19383 	/* Check and get function target data. */
19384 	t = btf_type_by_id(btf, btf_id);
19385 	if (!t)
19386 		return -EINVAL;
19387 	tname = btf_name_by_offset(btf, t->name_off);
19388 	if (!tname)
19389 		return -EINVAL;
19390 	if (!btf_type_is_func(t))
19391 		return -EINVAL;
19392 	t = btf_type_by_id(btf, t->type);
19393 	if (!btf_type_is_func_proto(t))
19394 		return -EINVAL;
19395 	err = btf_distill_func_proto(NULL, btf, t, tname, &tgt_info->fmodel);
19396 	if (err < 0)
19397 		return err;
19398 	if (btf_is_module(btf)) {
19399 		/* The bpf program already holds reference to module. */
19400 		if (WARN_ON_ONCE(!prog->aux->mod))
19401 			return -EINVAL;
19402 		addr = find_kallsyms_symbol_value(prog->aux->mod, tname);
19403 	} else {
19404 		addr = kallsyms_lookup_name(tname);
19405 	}
19406 	if (!addr || !ftrace_location(addr))
19407 		return -ENOENT;
19408 
19409 	/* Check sleepable program attachment. */
19410 	if (prog->sleepable) {
19411 		err = btf_id_allow_sleepable(btf_id, addr, prog, btf);
19412 		if (err)
19413 			return err;
19414 	}
19415 	tgt_info->tgt_addr = addr;
19416 	return 0;
19417 }
19418 
19419 struct btf *bpf_get_btf_vmlinux(void)
19420 {
19421 	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
19422 		mutex_lock(&bpf_verifier_lock);
19423 		if (!btf_vmlinux)
19424 			btf_vmlinux = btf_parse_vmlinux();
19425 		mutex_unlock(&bpf_verifier_lock);
19426 	}
19427 	return btf_vmlinux;
19428 }
19429 
19430 /*
19431  * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
19432  * this case expect that every file descriptor in the array is either a map or
19433  * a BTF. Everything else is considered to be trash.
19434  */
19435 static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
19436 {
19437 	struct bpf_map *map;
19438 	struct btf *btf;
19439 	CLASS(fd, f)(fd);
19440 	int err;
19441 
19442 	map = __bpf_map_get(f);
19443 	if (!IS_ERR(map)) {
19444 		err = __add_used_map(env, map);
19445 		if (err < 0)
19446 			return err;
19447 		return 0;
19448 	}
19449 
19450 	btf = __btf_get_by_fd(f);
19451 	if (!IS_ERR(btf)) {
19452 		btf_get(btf);
19453 		return __add_used_btf(env, btf);
19454 	}
19455 
19456 	verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
19457 	return PTR_ERR(map);
19458 }
19459 
19460 static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
19461 {
19462 	size_t size = sizeof(int);
19463 	int ret;
19464 	int fd;
19465 	u32 i;
19466 
19467 	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
19468 
19469 	/*
19470 	 * The only difference between old (no fd_array_cnt is given) and new
19471 	 * APIs is that in the latter case the fd_array is expected to be
19472 	 * continuous and is scanned for map fds right away
19473 	 */
19474 	if (!attr->fd_array_cnt)
19475 		return 0;
19476 
19477 	/* Check for integer overflow */
19478 	if (attr->fd_array_cnt >= (U32_MAX / size)) {
19479 		verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt);
19480 		return -EINVAL;
19481 	}
19482 
19483 	for (i = 0; i < attr->fd_array_cnt; i++) {
19484 		if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
19485 			return -EFAULT;
19486 
19487 		ret = add_fd_from_fd_array(env, fd);
19488 		if (ret)
19489 			return ret;
19490 	}
19491 
19492 	return 0;
19493 }
19494 
19495 /* replace a generic kfunc with a specialized version if necessary */
19496 static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx)
19497 {
19498 	struct bpf_prog *prog = env->prog;
19499 	bool seen_direct_write;
19500 	void *xdp_kfunc;
19501 	bool is_rdonly;
19502 	u32 func_id = desc->func_id;
19503 	u16 offset = desc->offset;
19504 	unsigned long addr = desc->addr;
19505 
19506 	if (offset) /* return if module BTF is used */
19507 		return 0;
19508 
19509 	if (bpf_dev_bound_kfunc_id(func_id)) {
19510 		xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
19511 		if (xdp_kfunc)
19512 			addr = (unsigned long)xdp_kfunc;
19513 		/* fallback to default kfunc when not supported by netdev */
19514 	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
19515 		seen_direct_write = env->seen_direct_write;
19516 		is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
19517 
19518 		if (is_rdonly)
19519 			addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
19520 
19521 		/* restore env->seen_direct_write to its original value, since
19522 		 * may_access_direct_pkt_data mutates it
19523 		 */
19524 		env->seen_direct_write = seen_direct_write;
19525 	} else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) {
19526 		if (bpf_lsm_has_d_inode_locked(prog))
19527 			addr = (unsigned long)bpf_set_dentry_xattr_locked;
19528 	} else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) {
19529 		if (bpf_lsm_has_d_inode_locked(prog))
19530 			addr = (unsigned long)bpf_remove_dentry_xattr_locked;
19531 	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
19532 		if (!env->insn_aux_data[insn_idx].non_sleepable)
19533 			addr = (unsigned long)bpf_dynptr_from_file_sleepable;
19534 	} else if (func_id == special_kfunc_list[KF_bpf_arena_alloc_pages]) {
19535 		if (env->insn_aux_data[insn_idx].non_sleepable)
19536 			addr = (unsigned long)bpf_arena_alloc_pages_non_sleepable;
19537 	} else if (func_id == special_kfunc_list[KF_bpf_arena_free_pages]) {
19538 		if (env->insn_aux_data[insn_idx].non_sleepable)
19539 			addr = (unsigned long)bpf_arena_free_pages_non_sleepable;
19540 	}
19541 	desc->addr = addr;
19542 	return 0;
19543 }
19544 
19545 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
19546 					    u16 struct_meta_reg,
19547 					    u16 node_offset_reg,
19548 					    struct bpf_insn *insn,
19549 					    struct bpf_insn *insn_buf,
19550 					    int *cnt)
19551 {
19552 	struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
19553 	struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
19554 
19555 	insn_buf[0] = addr[0];
19556 	insn_buf[1] = addr[1];
19557 	insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
19558 	insn_buf[3] = *insn;
19559 	*cnt = 4;
19560 }
19561 
19562 int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
19563 		     struct bpf_insn *insn_buf, int insn_idx, int *cnt)
19564 {
19565 	struct bpf_kfunc_desc *desc;
19566 	int err;
19567 
19568 	if (!insn->imm) {
19569 		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
19570 		return -EINVAL;
19571 	}
19572 
19573 	*cnt = 0;
19574 
19575 	/* insn->imm has the btf func_id. Replace it with an offset relative to
19576 	 * __bpf_call_base, unless the JIT needs to call functions that are
19577 	 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
19578 	 */
19579 	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
19580 	if (!desc) {
19581 		verifier_bug(env, "kernel function descriptor not found for func_id %u",
19582 			     insn->imm);
19583 		return -EFAULT;
19584 	}
19585 
19586 	err = specialize_kfunc(env, desc, insn_idx);
19587 	if (err)
19588 		return err;
19589 
19590 	if (!bpf_jit_supports_far_kfunc_call())
19591 		insn->imm = BPF_CALL_IMM(desc->addr);
19592 
19593 	if (is_bpf_obj_new_kfunc(desc->func_id) || is_bpf_percpu_obj_new_kfunc(desc->func_id)) {
19594 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19595 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19596 		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
19597 
19598 		if (is_bpf_percpu_obj_new_kfunc(desc->func_id) && kptr_struct_meta) {
19599 			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
19600 				     insn_idx);
19601 			return -EFAULT;
19602 		}
19603 
19604 		insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
19605 		insn_buf[1] = addr[0];
19606 		insn_buf[2] = addr[1];
19607 		insn_buf[3] = *insn;
19608 		*cnt = 4;
19609 	} else if (is_bpf_obj_drop_kfunc(desc->func_id) ||
19610 		   is_bpf_percpu_obj_drop_kfunc(desc->func_id) ||
19611 		   is_bpf_refcount_acquire_kfunc(desc->func_id)) {
19612 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19613 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19614 
19615 		if (is_bpf_percpu_obj_drop_kfunc(desc->func_id) && kptr_struct_meta) {
19616 			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
19617 				     insn_idx);
19618 			return -EFAULT;
19619 		}
19620 
19621 		if (is_bpf_refcount_acquire_kfunc(desc->func_id) && !kptr_struct_meta) {
19622 			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
19623 				     insn_idx);
19624 			return -EFAULT;
19625 		}
19626 
19627 		insn_buf[0] = addr[0];
19628 		insn_buf[1] = addr[1];
19629 		insn_buf[2] = *insn;
19630 		*cnt = 3;
19631 	} else if (is_bpf_list_push_kfunc(desc->func_id) ||
19632 		   is_bpf_rbtree_add_kfunc(desc->func_id)) {
19633 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19634 		int struct_meta_reg = BPF_REG_3;
19635 		int node_offset_reg = BPF_REG_4;
19636 
19637 		/* list_add/rbtree_add have an extra arg (prev/less),
19638 		 * so args-to-fixup are in diff regs.
19639 		 */
19640 		if (desc->func_id == special_kfunc_list[KF_bpf_list_add] ||
19641 		    is_bpf_rbtree_add_kfunc(desc->func_id)) {
19642 			struct_meta_reg = BPF_REG_4;
19643 			node_offset_reg = BPF_REG_5;
19644 		}
19645 
19646 		if (!kptr_struct_meta) {
19647 			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
19648 				     insn_idx);
19649 			return -EFAULT;
19650 		}
19651 
19652 		__fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
19653 						node_offset_reg, insn, insn_buf, cnt);
19654 	} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
19655 		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
19656 		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
19657 		*cnt = 1;
19658 	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] &&
19659 		   (env->prog->expected_attach_type == BPF_TRACE_FSESSION ||
19660 		    env->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) {
19661 
19662 		/*
19663 		 * inline the bpf_session_is_return() for fsession:
19664 		 *   bool bpf_session_is_return(void *ctx)
19665 		 *   {
19666 		 *       return (((u64 *)ctx)[-1] >> BPF_TRAMP_IS_RETURN_SHIFT) & 1;
19667 		 *   }
19668 		 */
19669 		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19670 		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_IS_RETURN_SHIFT);
19671 		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
19672 		*cnt = 3;
19673 	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
19674 		   (env->prog->expected_attach_type == BPF_TRACE_FSESSION ||
19675 		    env->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) {
19676 		/*
19677 		 * inline bpf_session_cookie() for fsession:
19678 		 *   __u64 *bpf_session_cookie(void *ctx)
19679 		 *   {
19680 		 *       u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_COOKIE_INDEX_SHIFT) & 0xFF;
19681 		 *       return &((u64 *)ctx)[-off];
19682 		 *   }
19683 		 */
19684 		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19685 		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_COOKIE_INDEX_SHIFT);
19686 		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
19687 		insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
19688 		insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
19689 		insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
19690 		*cnt = 6;
19691 	}
19692 
19693 	if (env->insn_aux_data[insn_idx].arg_prog) {
19694 		u32 regno = env->insn_aux_data[insn_idx].arg_prog;
19695 		struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) };
19696 		int idx = *cnt;
19697 
19698 		insn_buf[idx++] = ld_addrs[0];
19699 		insn_buf[idx++] = ld_addrs[1];
19700 		insn_buf[idx++] = *insn;
19701 		*cnt = idx;
19702 	}
19703 	return 0;
19704 }
19705 
19706 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr,
19707 	      struct bpf_log_attr *attr_log)
19708 {
19709 	u64 start_time = ktime_get_ns();
19710 	struct bpf_verifier_env *env;
19711 	int i, len, ret = -EINVAL, err;
19712 	bool is_priv;
19713 
19714 	BTF_TYPE_EMIT(enum bpf_features);
19715 
19716 	/* no program is valid */
19717 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
19718 		return -EINVAL;
19719 
19720 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
19721 	 * allocate/free it every time bpf_check() is called
19722 	 */
19723 	env = kvzalloc_obj(struct bpf_verifier_env, GFP_KERNEL_ACCOUNT);
19724 	if (!env)
19725 		return -ENOMEM;
19726 
19727 	env->bt.env = env;
19728 
19729 	len = (*prog)->len;
19730 	env->insn_aux_data =
19731 		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
19732 	ret = -ENOMEM;
19733 	if (!env->insn_aux_data)
19734 		goto err_free_env;
19735 	for (i = 0; i < len; i++)
19736 		env->insn_aux_data[i].orig_idx = i;
19737 	env->succ = bpf_iarray_realloc(NULL, 2);
19738 	if (!env->succ)
19739 		goto err_free_env;
19740 	env->prog = *prog;
19741 	env->ops = bpf_verifier_ops[env->prog->type];
19742 
19743 	env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
19744 	env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
19745 	env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token);
19746 	env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token);
19747 	env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF);
19748 
19749 	bpf_get_btf_vmlinux();
19750 
19751 	/* grab the mutex to protect few globals used by verifier */
19752 	if (!is_priv)
19753 		mutex_lock(&bpf_verifier_lock);
19754 
19755 	/* user could have requested verbose verifier output
19756 	 * and supplied buffer to store the verification trace
19757 	 */
19758 	ret = bpf_vlog_init(&env->log, attr_log->level, attr_log->ubuf, attr_log->size);
19759 	if (ret)
19760 		goto err_unlock;
19761 
19762 	ret = process_fd_array(env, attr, uattr);
19763 	if (ret)
19764 		goto skip_full_check;
19765 
19766 	mark_verifier_state_clean(env);
19767 
19768 	if (IS_ERR(btf_vmlinux)) {
19769 		/* Either gcc or pahole or kernel are broken. */
19770 		verbose(env, "in-kernel BTF is malformed\n");
19771 		ret = PTR_ERR(btf_vmlinux);
19772 		goto skip_full_check;
19773 	}
19774 
19775 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
19776 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
19777 		env->strict_alignment = true;
19778 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
19779 		env->strict_alignment = false;
19780 
19781 	if (is_priv)
19782 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
19783 	env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
19784 
19785 	env->explored_states = kvzalloc_objs(struct list_head,
19786 					     state_htab_size(env),
19787 					     GFP_KERNEL_ACCOUNT);
19788 	ret = -ENOMEM;
19789 	if (!env->explored_states)
19790 		goto skip_full_check;
19791 
19792 	for (i = 0; i < state_htab_size(env); i++)
19793 		INIT_LIST_HEAD(&env->explored_states[i]);
19794 	INIT_LIST_HEAD(&env->free_list);
19795 
19796 	ret = bpf_check_btf_info_early(env, attr, uattr);
19797 	if (ret < 0)
19798 		goto skip_full_check;
19799 
19800 	ret = add_subprog_and_kfunc(env);
19801 	if (ret < 0)
19802 		goto skip_full_check;
19803 
19804 	ret = check_subprogs(env);
19805 	if (ret < 0)
19806 		goto skip_full_check;
19807 
19808 	ret = bpf_check_btf_info(env, attr, uattr);
19809 	if (ret < 0)
19810 		goto skip_full_check;
19811 
19812 	ret = check_and_resolve_insns(env);
19813 	if (ret < 0)
19814 		goto skip_full_check;
19815 
19816 	if (bpf_prog_is_offloaded(env->prog->aux)) {
19817 		ret = bpf_prog_offload_verifier_prep(env->prog);
19818 		if (ret)
19819 			goto skip_full_check;
19820 	}
19821 
19822 	ret = bpf_check_cfg(env);
19823 	if (ret < 0)
19824 		goto skip_full_check;
19825 
19826 	ret = bpf_compute_postorder(env);
19827 	if (ret < 0)
19828 		goto skip_full_check;
19829 
19830 	ret = bpf_stack_liveness_init(env);
19831 	if (ret)
19832 		goto skip_full_check;
19833 
19834 	ret = check_attach_btf_id(env);
19835 	if (ret)
19836 		goto skip_full_check;
19837 
19838 	ret = bpf_compute_const_regs(env);
19839 	if (ret < 0)
19840 		goto skip_full_check;
19841 
19842 	ret = bpf_prune_dead_branches(env);
19843 	if (ret < 0)
19844 		goto skip_full_check;
19845 
19846 	ret = sort_subprogs_topo(env);
19847 	if (ret < 0)
19848 		goto skip_full_check;
19849 
19850 	ret = bpf_compute_scc(env);
19851 	if (ret < 0)
19852 		goto skip_full_check;
19853 
19854 	ret = bpf_compute_live_registers(env);
19855 	if (ret < 0)
19856 		goto skip_full_check;
19857 
19858 	ret = mark_fastcall_patterns(env);
19859 	if (ret < 0)
19860 		goto skip_full_check;
19861 
19862 	ret = do_check_main(env);
19863 	ret = ret ?: do_check_subprogs(env);
19864 
19865 	if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
19866 		ret = bpf_prog_offload_finalize(env);
19867 
19868 skip_full_check:
19869 	kvfree(env->explored_states);
19870 
19871 	/* might decrease stack depth, keep it before passes that
19872 	 * allocate additional slots.
19873 	 */
19874 	if (ret == 0)
19875 		ret = bpf_remove_fastcall_spills_fills(env);
19876 
19877 	if (ret == 0)
19878 		ret = check_max_stack_depth(env);
19879 
19880 	/* instruction rewrites happen after this point */
19881 	if (ret == 0)
19882 		ret = bpf_optimize_bpf_loop(env);
19883 
19884 	if (is_priv) {
19885 		if (ret == 0)
19886 			bpf_opt_hard_wire_dead_code_branches(env);
19887 		if (ret == 0)
19888 			ret = bpf_opt_remove_dead_code(env);
19889 		if (ret == 0)
19890 			ret = bpf_opt_remove_nops(env);
19891 	} else {
19892 		if (ret == 0)
19893 			sanitize_dead_code(env);
19894 	}
19895 
19896 	if (ret == 0)
19897 		/* program is valid, convert *(u32*)(ctx + off) accesses */
19898 		ret = bpf_convert_ctx_accesses(env);
19899 
19900 	if (ret == 0)
19901 		ret = bpf_do_misc_fixups(env);
19902 
19903 	/* do 32-bit optimization after insn patching has done so those patched
19904 	 * insns could be handled correctly.
19905 	 */
19906 	if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
19907 		ret = bpf_opt_subreg_zext_lo32_rnd_hi32(env, attr);
19908 		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
19909 								     : false;
19910 	}
19911 
19912 	if (ret == 0)
19913 		ret = bpf_fixup_call_args(env);
19914 
19915 	env->verification_time = ktime_get_ns() - start_time;
19916 	print_verification_stats(env);
19917 	env->prog->aux->verified_insns = env->insn_processed;
19918 
19919 	/* preserve original error even if log finalization is successful */
19920 	err = bpf_log_attr_finalize(attr_log, &env->log);
19921 	if (err)
19922 		ret = err;
19923 
19924 	if (ret)
19925 		goto err_release_maps;
19926 
19927 	if (env->used_map_cnt) {
19928 		/* if program passed verifier, update used_maps in bpf_prog_info */
19929 		env->prog->aux->used_maps = kmalloc_objs(env->used_maps[0],
19930 							 env->used_map_cnt,
19931 							 GFP_KERNEL_ACCOUNT);
19932 
19933 		if (!env->prog->aux->used_maps) {
19934 			ret = -ENOMEM;
19935 			goto err_release_maps;
19936 		}
19937 
19938 		memcpy(env->prog->aux->used_maps, env->used_maps,
19939 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
19940 		env->prog->aux->used_map_cnt = env->used_map_cnt;
19941 	}
19942 	if (env->used_btf_cnt) {
19943 		/* if program passed verifier, update used_btfs in bpf_prog_aux */
19944 		env->prog->aux->used_btfs = kmalloc_objs(env->used_btfs[0],
19945 							 env->used_btf_cnt,
19946 							 GFP_KERNEL_ACCOUNT);
19947 		if (!env->prog->aux->used_btfs) {
19948 			ret = -ENOMEM;
19949 			goto err_release_maps;
19950 		}
19951 
19952 		memcpy(env->prog->aux->used_btfs, env->used_btfs,
19953 		       sizeof(env->used_btfs[0]) * env->used_btf_cnt);
19954 		env->prog->aux->used_btf_cnt = env->used_btf_cnt;
19955 	}
19956 	if (env->used_map_cnt || env->used_btf_cnt) {
19957 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
19958 		 * bpf_ld_imm64 instructions
19959 		 */
19960 		convert_pseudo_ld_imm64(env);
19961 	}
19962 
19963 	adjust_btf_func(env);
19964 
19965 	/* extension progs temporarily inherit the attach_type of their targets
19966 	   for verification purposes, so set it back to zero before returning
19967 	 */
19968 	if (env->prog->type == BPF_PROG_TYPE_EXT)
19969 		env->prog->expected_attach_type = 0;
19970 
19971 	env->prog = __bpf_prog_select_runtime(env, env->prog, &ret);
19972 
19973 err_release_maps:
19974 	if (ret)
19975 		release_insn_arrays(env);
19976 	if (!env->prog->aux->used_maps)
19977 		/* if we didn't copy map pointers into bpf_prog_info, release
19978 		 * them now. Otherwise free_used_maps() will release them.
19979 		 */
19980 		release_maps(env);
19981 	if (!env->prog->aux->used_btfs)
19982 		release_btfs(env);
19983 
19984 	*prog = env->prog;
19985 
19986 	module_put(env->attach_btf_mod);
19987 err_unlock:
19988 	if (!is_priv)
19989 		mutex_unlock(&bpf_verifier_lock);
19990 	bpf_clear_insn_aux_data(env, 0, env->prog->len);
19991 	vfree(env->insn_aux_data);
19992 err_free_env:
19993 	bpf_stack_liveness_free(env);
19994 	kvfree(env->cfg.insn_postorder);
19995 	kvfree(env->scc_info);
19996 	kvfree(env->succ);
19997 	kvfree(env->gotox_tmp_buf);
19998 	kvfree(env);
19999 	return ret;
20000 }
20001